diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/main.c b/travail_de_groupe/jeu_appren_par_renfo/src/main.c
index 4466e44e362314f55775631628d5987c5cfb3c8b..fc78b9bfa87cf68d1270b0e98a7b362870ba6407 100644
--- a/travail_de_groupe/jeu_appren_par_renfo/src/main.c
+++ b/travail_de_groupe/jeu_appren_par_renfo/src/main.c
@@ -6,11 +6,11 @@ int game_state;
 
 int main(){ 
     float ***** Q = allocateAndInitiateQ();
-    int i= 5;
+    int i= 10;
     
     srand ( time(NULL));
     while (i>0){
-    traningAgent(10,1, Q);
+    traningAgent(1000000,30, Q);
     i--;} 
     
     writeQ(Q);
diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c
index 0830c3b252d5d2dffef1533b5744f2ee97a2ee83..5f67460c97b9d913378d99fdd1fa4ef6c9a07e3f 100644
--- a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c
+++ b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c
@@ -321,7 +321,7 @@ void freeStack(stack_t *stack)
     {
         free(stack->base); //on libère le tableau dynamique
         free(stack);    //on libère la tête de la stack
-        //printf("Pile libérée\n");
+        printf("Pile libérée\n");
     }
     else
     {
@@ -362,55 +362,109 @@ void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir
         dropZone=convertIntoZone(dropPoint.x,dropPoint.y); 
         canonZone= convertIntoZoneCanon(canon.x,canon.y); 
         reward=0; 
-        printf("%d %d %d %d \n",dropZone, canonZone,zoneAngleH,zoneAngleF);
-        printf("%d %d  \n",agent->x, agent->y);
-
-        for (i=0; i<numberStep-1;i++){ 
-            action = takeAction(agent->x,agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); 
-            agentZone = convertIntoZone(agent->x, agent->y); 
-            line.receiverZone=agentZone; 
-            line.shooterZone =canonZone; 
-            line.angleHZone= zoneAngleH; 
-            line.angleFZone= zoneAngleF; 
-            line.action= action;
-            line.reward= reward ; 
-            actionStack(stack,line);
-            moveAgent(agent, action);
 
+        for ( i=0; i< numberStep - 1; i++ ){
+            action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy);
+            agentZone= convertIntoZone( agent->x,agent->y ); 
+            line.receiverZone= agentZone; 
+            line.shooterZone= canonZone; 
+            line.angleHZone=zoneAngleH; 
+            line.angleFZone=zoneAngleF; 
+            line.action= action; 
+            line.reward= 0; 
+            actionStack( stack , line); 
+            moveAgent(agent, action); 
+        } 
+        action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy);
+        agentZone= convertIntoZone( agent->x,agent->y ); 
+        line.receiverZone= agentZone; 
+        line.shooterZone= canonZone; 
+        line.angleHZone=zoneAngleH; 
+        line.angleFZone=zoneAngleF; 
+        line.action= action; 
+        moveAgent(agent, action); 
+        actionStack( stack , line); 
+        agentZone= convertIntoZone( agent->x,agent->y ); 
+
+        reward = (agentZone==dropZone); 
+
+        Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += 
+            LEARN_RATE* ( reward -
+             Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
+
+        while ( !emptyStack(stack)){
+            reward=line.reward; 
+            maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]); 
+            line=unStack(stack); 
+
+
+            Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=
+                    DISCOUNT*(reward +
+                    LEARN_RATE* Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction]
+                    -Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action]);
         }
-        action = takeAction(agent->x, agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); 
-        agentZone = convertIntoZone(agent->x, agent->y); 
+        //if ( numberRun%1000000==1){printf (" %d \n  ", numberRun);}  
+        greedy = greedy - 1/numberRun;
+        numberRun--; 
+    }
+    freeStack(stack); 
+
+
+
+
+
+
+
+
+    //     printf("%d %d %d %d \n",dropZone, canonZone,zoneAngleH,zoneAngleF);
+    //     printf("%d %d  \n",agent->x, agent->y);
+
+    //     for (i=0; i<numberStep-1;i++){ 
+    //         action = takeAction(agent->x,agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); 
+    //         agentZone = convertIntoZone(agent->x, agent->y); 
+    //         line.receiverZone=agentZone; 
+    //         line.shooterZone =canonZone; 
+    //         line.angleHZone= zoneAngleH; 
+    //         line.angleFZone= zoneAngleF; 
+    //         line.action= action;
+    //         line.reward= reward ; 
+    //         actionStack(stack,line);
+    //         moveAgent(agent, action);
+
+    //     }
+    //     action = takeAction(agent->x, agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); 
+    //     agentZone = convertIntoZone(agent->x, agent->y); 
        
-        line.receiverZone=agentZone; 
-        line.shooterZone =canonZone; 
-        line.angleHZone= zoneAngleH; 
-        line.angleFZone= zoneAngleF; 
-        line.action= action;
-        line.reward = 0; 
-       // actionStack(stack,line);
-        moveAgent(agent, action);
-         if (agentZone==dropZone){ 
-                   reward=1; 
-                }
-                else{reward= 0;}
+    //     line.receiverZone=agentZone; 
+    //     line.shooterZone =canonZone; 
+    //     line.angleHZone= zoneAngleH; 
+    //     line.angleFZone= zoneAngleF; 
+    //     line.action= action;
+    //     line.reward = 0; 
+    //    // actionStack(stack,line);
+    //     moveAgent(agent, action);
+    //      if (agentZone==dropZone){ 
+    //                reward=1; 
+    //             }
+    //             else{reward= 0;}
         
 
-        Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=  
-                    + LEARN_RATE* ( reward - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
+    //     Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=  
+    //                 + LEARN_RATE* ( reward - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
        
-        while (!emptyStack(stack)){
-            maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]);
-            reward=line.reward;
-            line=unStack(stack);
-
-            Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=  
-                    + LEARN_RATE* ( reward +  DISCOUNT*Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction]
-                    - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
-        }  
-        numberRun--; 
-        greedy=greedy-1/((float)numberRun);
-
-        if ( numberRun%1000000==1){printf (" %d \n  ", numberRun);} 
-    } 
-    freeStack(stack);
+    //     while (!emptyStack(stack)){
+    //         maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]);
+    //         reward=line.reward;
+    //         line=unStack(stack);
+
+    //         Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=  
+    //                 + LEARN_RATE* ( reward +  DISCOUNT*Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction]
+    //                 - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
+    //     }  
+    //     numberRun--; 
+    //     greedy=greedy-1/((float)numberRun);
+
+    //     if ( numberRun%1000000==1){printf (" %d \n  ", numberRun);} 
+    // } 
+    // freeStack(stack);
 } 
\ No newline at end of file