From 58a68fe24668cb2b53dc9857b8dae32d0ec17440 Mon Sep 17 00:00:00 2001
From: maberet <maberet@ada.local.isima.fr>
Date: Thu, 30 Jun 2022 14:28:13 +0200
Subject: [PATCH]  commit pour recup qlearn

---
 .../jeu_appren_par_renfo/src/main.c           |   4 +-
 .../jeu_appren_par_renfo/src/qlearn.c         | 186 +++++++-----------
 2 files changed, 73 insertions(+), 117 deletions(-)

diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/main.c b/travail_de_groupe/jeu_appren_par_renfo/src/main.c
index fc78b9b..52cd0d3 100644
--- a/travail_de_groupe/jeu_appren_par_renfo/src/main.c
+++ b/travail_de_groupe/jeu_appren_par_renfo/src/main.c
@@ -6,11 +6,11 @@ int game_state;
 
 int main(){ 
     float ***** Q = allocateAndInitiateQ();
-    int i= 10;
+    int i= 1;
     
     srand ( time(NULL));
     while (i>0){
-    traningAgent(1000000,30, Q);
+    traningAgent(10000,30, Q);
     i--;} 
     
     writeQ(Q);
diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c
index 5f67460..68d1e56 100644
--- a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c
+++ b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c
@@ -113,7 +113,7 @@ void writeQ(float *****Q){
 
 int argmax(float * arr){
     int i;
-    float max = arr[0];
+    float max = arr[rand()%5];
     int maxIndex = 0;
     //printf("argmax:  %f ", arr[0]);
     for(i = 1; i < NUMBER_ACTION; i++){
@@ -321,7 +321,7 @@ void freeStack(stack_t *stack)
     {
         free(stack->base); //on libère le tableau dynamique
         free(stack);    //on libère la tête de la stack
-        printf("Pile libérée\n");
+        //printf("Pile libérée\n");
     }
     else
     {
@@ -334,12 +334,12 @@ void freeStack(stack_t *stack)
 void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir la bonne taille il faut diviser par block size 
     int i ; 
     int action;
-    point_t canon ; 
+    // point_t canon ; 
     point_t dropPoint ; 
     int canonZone; 
     int dropZone; 
-    float angleH;
-    float angleF;
+    // float angleH;
+    // float angleF;
     int zoneAngleH;
     int zoneAngleF; 
     int agentZone; 
@@ -351,120 +351,76 @@ void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir
     int maxAction;
     stack= initStack(6000); 
     
+    int j ,k,l,m; 
+    
     while (numberRun>0){
         agent=initAgent(); 
-        canon=initCanon(canon); 
-        dropPoint= initDropPoint(dropPoint);
-        angleF=defineAngleF( canon.x, canon.x, dropPoint.x, dropPoint.y );
-        angleH=defineAngleH( canon.x, canon.y ); 
-        zoneAngleF=converterIntoAngleF(angleF);
-        zoneAngleH=converterIntoAngleH(angleH);
-        dropZone=convertIntoZone(dropPoint.x,dropPoint.y); 
-        canonZone= convertIntoZoneCanon(canon.x,canon.y); 
-        reward=0; 
-
-        for ( i=0; i< numberStep - 1; i++ ){
-            action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy);
-            agentZone= convertIntoZone( agent->x,agent->y ); 
-            line.receiverZone= agentZone; 
-            line.shooterZone= canonZone; 
-            line.angleHZone=zoneAngleH; 
-            line.angleFZone=zoneAngleF; 
-            line.action= action; 
-            line.reward= 0; 
-            actionStack( stack , line); 
-            moveAgent(agent, action); 
-        } 
-        action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy);
-        agentZone= convertIntoZone( agent->x,agent->y ); 
-        line.receiverZone= agentZone; 
-        line.shooterZone= canonZone; 
-        line.angleHZone=zoneAngleH; 
-        line.angleFZone=zoneAngleF; 
-        line.action= action; 
-        moveAgent(agent, action); 
-        actionStack( stack , line); 
-        agentZone= convertIntoZone( agent->x,agent->y ); 
-
-        reward = (agentZone==dropZone); 
-
-        Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += 
-            LEARN_RATE* ( reward -
-             Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
-
-        while ( !emptyStack(stack)){
-            reward=line.reward; 
-            maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]); 
-            line=unStack(stack); 
-
-
-            Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=
-                    DISCOUNT*(reward +
-                    LEARN_RATE* Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction]
-                    -Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action]);
-        }
-        //if ( numberRun%1000000==1){printf (" %d \n  ", numberRun);}  
+        for( j=0; j<4;j++){ 
+            for (k=0;k<3;k++){ 
+                    for (l=0; l<5;l++){ 
+                        m=100;
+                        while(m>0){  
+                            i= numberStep; 
+                            agent = initAgent();
+                            zoneAngleF=l;
+                            zoneAngleH=k;
+                            dropPoint=initDropPoint(dropPoint);
+                            dropZone=convertIntoZone(dropPoint.x,dropPoint.y); 
+                            canonZone= j; 
+                            reward=0; 
+                            //printf (" %d \n  ", m);
+                            while(i>0){
+                                action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy);
+                                agentZone= convertIntoZone( agent->x,agent->y ); 
+                                line.receiverZone= agentZone; 
+                                line.shooterZone= canonZone; 
+                                line.angleHZone=zoneAngleH; 
+                                line.angleFZone=zoneAngleF; 
+                                line.action= action; 
+                                line.reward= (agentZone==dropZone); 
+                                actionStack( stack , line); 
+                                moveAgent(agent, action);
+                                if ((agentZone==dropZone)){break;} 
+                            } 
+                            if ( (agentZone!=dropZone) ){ } 
+                            action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy);
+                            agentZone= convertIntoZone( agent->x,agent->y ); 
+                            line.receiverZone= agentZone; 
+                            line.shooterZone= canonZone; 
+                            line.angleHZone=zoneAngleH; 
+                            line.angleFZone=zoneAngleF; 
+                            line.action= action; 
+                            moveAgent(agent, action); 
+                            actionStack( stack , line); 
+                            agentZone= convertIntoZone( agent->x,agent->y ); 
+
+                            reward = (agentZone==dropZone); 
+
+                            Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += 
+                                LEARN_RATE* ( reward -
+                                Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
+
+                            while ( !emptyStack(stack)){
+                                reward=line.reward; 
+                                maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]); 
+                                line=unStack(stack); 
+
+
+                                Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=
+                                        DISCOUNT*(reward +
+                                        LEARN_RATE* Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction]
+                                        -Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action]);
+                            }
+                            m--;
+                        } 
+                        
+                        
+                    } 
+                }
+            } writeQ(Q);
+        if ( numberRun%10000==1){printf (" %d \n  ", numberRun);} 
         greedy = greedy - 1/numberRun;
         numberRun--; 
     }
     freeStack(stack); 
-
-
-
-
-
-
-
-
-    //     printf("%d %d %d %d \n",dropZone, canonZone,zoneAngleH,zoneAngleF);
-    //     printf("%d %d  \n",agent->x, agent->y);
-
-    //     for (i=0; i<numberStep-1;i++){ 
-    //         action = takeAction(agent->x,agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); 
-    //         agentZone = convertIntoZone(agent->x, agent->y); 
-    //         line.receiverZone=agentZone; 
-    //         line.shooterZone =canonZone; 
-    //         line.angleHZone= zoneAngleH; 
-    //         line.angleFZone= zoneAngleF; 
-    //         line.action= action;
-    //         line.reward= reward ; 
-    //         actionStack(stack,line);
-    //         moveAgent(agent, action);
-
-    //     }
-    //     action = takeAction(agent->x, agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); 
-    //     agentZone = convertIntoZone(agent->x, agent->y); 
-       
-    //     line.receiverZone=agentZone; 
-    //     line.shooterZone =canonZone; 
-    //     line.angleHZone= zoneAngleH; 
-    //     line.angleFZone= zoneAngleF; 
-    //     line.action= action;
-    //     line.reward = 0; 
-    //    // actionStack(stack,line);
-    //     moveAgent(agent, action);
-    //      if (agentZone==dropZone){ 
-    //                reward=1; 
-    //             }
-    //             else{reward= 0;}
-        
-
-    //     Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=  
-    //                 + LEARN_RATE* ( reward - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
-       
-    //     while (!emptyStack(stack)){
-    //         maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]);
-    //         reward=line.reward;
-    //         line=unStack(stack);
-
-    //         Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=  
-    //                 + LEARN_RATE* ( reward +  DISCOUNT*Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction]
-    //                 - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
-    //     }  
-    //     numberRun--; 
-    //     greedy=greedy-1/((float)numberRun);
-
-    //     if ( numberRun%1000000==1){printf (" %d \n  ", numberRun);} 
-    // } 
-    // freeStack(stack);
 } 
\ No newline at end of file
-- 
GitLab