From 58a68fe24668cb2b53dc9857b8dae32d0ec17440 Mon Sep 17 00:00:00 2001 From: maberet <maberet@ada.local.isima.fr> Date: Thu, 30 Jun 2022 14:28:13 +0200 Subject: [PATCH] commit pour recup qlearn --- .../jeu_appren_par_renfo/src/main.c | 4 +- .../jeu_appren_par_renfo/src/qlearn.c | 186 +++++++----------- 2 files changed, 73 insertions(+), 117 deletions(-) diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/main.c b/travail_de_groupe/jeu_appren_par_renfo/src/main.c index fc78b9b..52cd0d3 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/main.c +++ b/travail_de_groupe/jeu_appren_par_renfo/src/main.c @@ -6,11 +6,11 @@ int game_state; int main(){ float ***** Q = allocateAndInitiateQ(); - int i= 10; + int i= 1; srand ( time(NULL)); while (i>0){ - traningAgent(1000000,30, Q); + traningAgent(10000,30, Q); i--;} writeQ(Q); diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c index 5f67460..68d1e56 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c +++ b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c @@ -113,7 +113,7 @@ void writeQ(float *****Q){ int argmax(float * arr){ int i; - float max = arr[0]; + float max = arr[rand()%5]; int maxIndex = 0; //printf("argmax: %f ", arr[0]); for(i = 1; i < NUMBER_ACTION; i++){ @@ -321,7 +321,7 @@ void freeStack(stack_t *stack) { free(stack->base); //on libère le tableau dynamique free(stack); //on libère la tête de la stack - printf("Pile libérée\n"); + //printf("Pile libérée\n"); } else { @@ -334,12 +334,12 @@ void freeStack(stack_t *stack) void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir la bonne taille il faut diviser par block size int i ; int action; - point_t canon ; + // point_t canon ; point_t dropPoint ; int canonZone; int dropZone; - float angleH; - float angleF; + // float angleH; + // float angleF; int zoneAngleH; int zoneAngleF; int agentZone; @@ -351,120 +351,76 @@ void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir int maxAction; stack= initStack(6000); + int j ,k,l,m; + while (numberRun>0){ agent=initAgent(); - canon=initCanon(canon); - dropPoint= initDropPoint(dropPoint); - angleF=defineAngleF( canon.x, canon.x, dropPoint.x, dropPoint.y ); - angleH=defineAngleH( canon.x, canon.y ); - zoneAngleF=converterIntoAngleF(angleF); - zoneAngleH=converterIntoAngleH(angleH); - dropZone=convertIntoZone(dropPoint.x,dropPoint.y); - canonZone= convertIntoZoneCanon(canon.x,canon.y); - reward=0; - - for ( i=0; i< numberStep - 1; i++ ){ - action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy); - agentZone= convertIntoZone( agent->x,agent->y ); - line.receiverZone= agentZone; - line.shooterZone= canonZone; - line.angleHZone=zoneAngleH; - line.angleFZone=zoneAngleF; - line.action= action; - line.reward= 0; - actionStack( stack , line); - moveAgent(agent, action); - } - action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy); - agentZone= convertIntoZone( agent->x,agent->y ); - line.receiverZone= agentZone; - line.shooterZone= canonZone; - line.angleHZone=zoneAngleH; - line.angleFZone=zoneAngleF; - line.action= action; - moveAgent(agent, action); - actionStack( stack , line); - agentZone= convertIntoZone( agent->x,agent->y ); - - reward = (agentZone==dropZone); - - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += - LEARN_RATE* ( reward - - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] ); - - while ( !emptyStack(stack)){ - reward=line.reward; - maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]); - line=unStack(stack); - - - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += - DISCOUNT*(reward + - LEARN_RATE* Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction] - -Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action]); - } - //if ( numberRun%1000000==1){printf (" %d \n ", numberRun);} + for( j=0; j<4;j++){ + for (k=0;k<3;k++){ + for (l=0; l<5;l++){ + m=100; + while(m>0){ + i= numberStep; + agent = initAgent(); + zoneAngleF=l; + zoneAngleH=k; + dropPoint=initDropPoint(dropPoint); + dropZone=convertIntoZone(dropPoint.x,dropPoint.y); + canonZone= j; + reward=0; + //printf (" %d \n ", m); + while(i>0){ + action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy); + agentZone= convertIntoZone( agent->x,agent->y ); + line.receiverZone= agentZone; + line.shooterZone= canonZone; + line.angleHZone=zoneAngleH; + line.angleFZone=zoneAngleF; + line.action= action; + line.reward= (agentZone==dropZone); + actionStack( stack , line); + moveAgent(agent, action); + if ((agentZone==dropZone)){break;} + } + if ( (agentZone!=dropZone) ){ } + action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy); + agentZone= convertIntoZone( agent->x,agent->y ); + line.receiverZone= agentZone; + line.shooterZone= canonZone; + line.angleHZone=zoneAngleH; + line.angleFZone=zoneAngleF; + line.action= action; + moveAgent(agent, action); + actionStack( stack , line); + agentZone= convertIntoZone( agent->x,agent->y ); + + reward = (agentZone==dropZone); + + Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += + LEARN_RATE* ( reward - + Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] ); + + while ( !emptyStack(stack)){ + reward=line.reward; + maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]); + line=unStack(stack); + + + Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += + DISCOUNT*(reward + + LEARN_RATE* Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction] + -Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action]); + } + m--; + } + + + } + } + } writeQ(Q); + if ( numberRun%10000==1){printf (" %d \n ", numberRun);} greedy = greedy - 1/numberRun; numberRun--; } freeStack(stack); - - - - - - - - - // printf("%d %d %d %d \n",dropZone, canonZone,zoneAngleH,zoneAngleF); - // printf("%d %d \n",agent->x, agent->y); - - // for (i=0; i<numberStep-1;i++){ - // action = takeAction(agent->x,agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); - // agentZone = convertIntoZone(agent->x, agent->y); - // line.receiverZone=agentZone; - // line.shooterZone =canonZone; - // line.angleHZone= zoneAngleH; - // line.angleFZone= zoneAngleF; - // line.action= action; - // line.reward= reward ; - // actionStack(stack,line); - // moveAgent(agent, action); - - // } - // action = takeAction(agent->x, agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); - // agentZone = convertIntoZone(agent->x, agent->y); - - // line.receiverZone=agentZone; - // line.shooterZone =canonZone; - // line.angleHZone= zoneAngleH; - // line.angleFZone= zoneAngleF; - // line.action= action; - // line.reward = 0; - // // actionStack(stack,line); - // moveAgent(agent, action); - // if (agentZone==dropZone){ - // reward=1; - // } - // else{reward= 0;} - - - // Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += - // + LEARN_RATE* ( reward - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] ); - - // while (!emptyStack(stack)){ - // maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]); - // reward=line.reward; - // line=unStack(stack); - - // Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += - // + LEARN_RATE* ( reward + DISCOUNT*Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction] - // - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] ); - // } - // numberRun--; - // greedy=greedy-1/((float)numberRun); - - // if ( numberRun%1000000==1){printf (" %d \n ", numberRun);} - // } - // freeStack(stack); } \ No newline at end of file -- GitLab