diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/main.c b/travail_de_groupe/jeu_appren_par_renfo/src/main.c index a9db2c8d81bc3a35e1da1a49fdaa6773f092ffbb..94fa8008c72fe413115db2e2bbe80174443cce69 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/main.c +++ b/travail_de_groupe/jeu_appren_par_renfo/src/main.c @@ -8,15 +8,17 @@ int main(){ float ***** Q = allocateAndInitiateQ(); srand ( time(NULL)); + + traningAgent(1000000, 15, Q); - writeQ(Q); - running = 1; - game_state = GAME; - readMapFromFile("map.txt"); - initPlayer(); - initKeys(); + writeQ(Q); + // running = 1; + // game_state = GAME; + // readMapFromFile("map.txt"); + // initPlayer(); + // initKeys(); - mainLoop(); + // mainLoop(); } \ No newline at end of file diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c index b481d7f4df25c3f186caf9a69ceded6d11ad8b91..38fce48ff63247084ab18bdcb4034cc993ec67c6 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c +++ b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c @@ -133,7 +133,7 @@ int convertIntoZone(int xAgent,int yAgent){ yAgent=yAgent/BLOCK_SIZE; if(xAgent<23 && yAgent<=4){zone=0;} else if(xAgent<31 && yAgent<=4){zone=1;} - else if(xAgent<23 && yAgent<9){zone=2;} + else if(xAgent<23 && yAgent<9){zone=2;} else if(xAgent< 31&& yAgent<9){zone=3;} return zone ; @@ -153,7 +153,6 @@ int convertIntoZoneCanon(int xCanon,int yCanon){ int converterIntoAngleF(float angleF){ int angleZone=0; long angleFd=(long)(angleF*1000000); - printf ("%ld \n ", angleFd); if( ((long)(-M_PI/2*1000000)<=angleFd)&&(angleFd<=(long)((-M_PI/2+M_PI/5)*1000000))){angleZone=4;} else if( ((long)((-(M_PI/2)+(M_PI/5))*1000000)<angleFd)&&(angleFd<=(long)((-M_PI/2+2*M_PI/5)*1000000))){angleZone=3;} else if( (angleFd>(long)((-M_PI/2+2*M_PI/5)*1000000))&&(angleFd<=(long)((-M_PI/2+3*M_PI/5)*1000000))){angleZone=2;} @@ -332,3 +331,84 @@ void freeStack(stack_t *stack) +void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir la bonne taille il faut diviser par block size + int i ; + int action; + point_t canon ; + point_t dropPoint ; + int canonZone; + int dropZone; + float angleH; + float angleF; + int zoneAngleH; + int zoneAngleF; + int agentZone; + int reward; + agent_t *agent; + stack_t *stack; + line_t line; + float greedy=1; + int maxAction; + stack= initStack(1000); + + while (numberRun>0){ + agent=initAgent(); + canon=initCanon(canon); + dropPoint= initDropPoint(dropPoint); + angleF=defineAngleF( canon.x, canon.x, dropPoint.x, dropPoint.y ); + angleH=defineAngleH( canon.x, canon.y ); + zoneAngleF=converterIntoAngleF(angleF); + zoneAngleH=converterIntoAngleH(angleH); + dropZone=convertIntoZone(dropPoint.x,dropPoint.y); + canonZone= convertIntoZoneCanon(canon.x,canon.y); + reward=0; + + for (i=0; i<numberStep-1;i++){ + action = takeAction(agent->x,agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); + agentZone = convertIntoZone(agent->x, agent->y); + line.receiverZone=agentZone; + line.shooterZone =canonZone; + line.angleHZone= zoneAngleH; + line.angleFZone= zoneAngleF; + line.action= action; + line.reward=0; + actionStack(stack,line); + moveAgent(agent, action); + printf("wtf%d \n ",i); + + } + action = takeAction(agent->x,agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); + agentZone = convertIntoZone(agent->x, agent->y); + line.receiverZone=agentZone; + line.shooterZone =canonZone; + line.angleHZone= zoneAngleH; + line.angleFZone= zoneAngleF; + line.action= action; + line.reward=0; + actionStack(stack,line); + moveAgent(agent, action); + if (agentZone==dropZone){ + reward=1; + } + else{reward= 0;} + + printf("wtf2\n "); + + Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += + + greedy* ( reward- Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] ); + + while (!emptyStack(stack)){ + maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]); + reward=line.reward; + line=unStack(stack); + + Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += + + greedy* ( reward + LEARN_RATE *Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction] + - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] ); + } + printf("wtf1 \n "); + numberRun--; + greedy=greedy-1/((float)numberRun); + printf("wtf1 \n "); + } +} \ No newline at end of file diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h index 875f3149a710513ca1edc68456275fbf2e6cd00f..5d4fc877bd5cb18eb4850e341d3626e5362c55f6 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h +++ b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h @@ -67,5 +67,5 @@ int fullStack(stack_t *stack); void actionStack(stack_t *stack, line_t line); line_t unStack(stack_t *stack); void freeStack(stack_t *stack); - +void traningAgent( int numberRun, int numberStep, float *****Q); #endif \ No newline at end of file