Skip to content
Snippets Groups Projects
Commit b3a1cd8b authored by maberet's avatar maberet
Browse files

modif de la formule q learn

parent 22b6e2fe
No related branches found
No related tags found
No related merge requests found
...@@ -6,11 +6,13 @@ float ***** Q; ...@@ -6,11 +6,13 @@ float ***** Q;
int main(){ int main(){
Q = allocateAndInitiateQ(); Q = allocateAndInitiateQ();
readQFromFile(Q); readQFromFile(Q);
int i= 10; int i= 50;
srand ( time(NULL)); srand ( time(NULL));
while (i>0){ while (i>0){
traningAgent(1000000,30, Q); traningAgent(1000000,30, Q);
printf("%d \n ", i);
writeQ(Q);
i--;} i--;}
writeQ(Q); writeQ(Q);
......
...@@ -400,7 +400,6 @@ void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir ...@@ -400,7 +400,6 @@ void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir
line.angleFZone=zoneAngleF; line.angleFZone=zoneAngleF;
line.action= action; line.action= action;
moveAgent(agent, action); moveAgent(agent, action);
actionStack( stack , line);
agentZone= convertIntoZone( agent->x,agent->y ); agentZone= convertIntoZone( agent->x,agent->y );
reward = (agentZone==dropZone); reward = (agentZone==dropZone);
...@@ -412,16 +411,21 @@ void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir ...@@ -412,16 +411,21 @@ void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir
while ( !emptyStack(stack)){ while ( !emptyStack(stack)){
reward=line.reward; reward=line.reward;
maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]); maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]);
agentZone= line.receiverZone;
canonZone= line.shooterZone;
zoneAngleH= line.angleHZone;
zoneAngleF=line.angleFZone;
line=unStack(stack); line=unStack(stack);
Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=
DISCOUNT*(reward + DISCOUNT*(reward +
LEARN_RATE* Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction] LEARN_RATE* Q[agentZone][canonZone][zoneAngleH][zoneAngleF][maxAction]
-Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action]); -Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action]);
} }
//if ( numberRun%1000000==1){printf (" %d \n ", numberRun);} //if ( numberRun%1000000==1){printf (" %d \n ", numberRun);}
greedy = greedy - 1/numberRun; //greedy = greedy - 1/numberRun;
numberRun--; numberRun--;
} }
freeStack(stack); freeStack(stack);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment