diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/main.c b/travail_de_groupe/jeu_appren_par_renfo/src/main.c
index a9db2c8d81bc3a35e1da1a49fdaa6773f092ffbb..94fa8008c72fe413115db2e2bbe80174443cce69 100644
--- a/travail_de_groupe/jeu_appren_par_renfo/src/main.c
+++ b/travail_de_groupe/jeu_appren_par_renfo/src/main.c
@@ -8,15 +8,17 @@ int main(){
     float ***** Q = allocateAndInitiateQ();
     
     srand ( time(NULL));
+
+    traningAgent(1000000, 15, Q);
     
 
-    writeQ(Q);
-    running = 1;
-    game_state = GAME;
-    readMapFromFile("map.txt");
-    initPlayer();
-    initKeys();
+     writeQ(Q);
+    // running = 1;
+    // game_state = GAME;
+    // readMapFromFile("map.txt");
+    // initPlayer();
+    // initKeys();
     
-    mainLoop();
+    // mainLoop();
 
 } 
\ No newline at end of file
diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c
index b481d7f4df25c3f186caf9a69ceded6d11ad8b91..38fce48ff63247084ab18bdcb4034cc993ec67c6 100644
--- a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c
+++ b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c
@@ -133,7 +133,7 @@ int convertIntoZone(int xAgent,int yAgent){
     yAgent=yAgent/BLOCK_SIZE;
     if(xAgent<23 && yAgent<=4){zone=0;} 
     else if(xAgent<31 && yAgent<=4){zone=1;} 
-    else if(xAgent<23 && yAgent<9){zone=2;} 
+    else if(xAgent<23 && yAgent<9){zone=2;}
     else if(xAgent< 31&& yAgent<9){zone=3;}
      
     return zone ; 
@@ -153,7 +153,6 @@ int convertIntoZoneCanon(int xCanon,int yCanon){
 int converterIntoAngleF(float angleF){
     int angleZone=0;
     long angleFd=(long)(angleF*1000000); 
-    printf ("%ld \n ", angleFd);
     if( ((long)(-M_PI/2*1000000)<=angleFd)&&(angleFd<=(long)((-M_PI/2+M_PI/5)*1000000))){angleZone=4;} 
     else if( ((long)((-(M_PI/2)+(M_PI/5))*1000000)<angleFd)&&(angleFd<=(long)((-M_PI/2+2*M_PI/5)*1000000))){angleZone=3;} 
     else if( (angleFd>(long)((-M_PI/2+2*M_PI/5)*1000000))&&(angleFd<=(long)((-M_PI/2+3*M_PI/5)*1000000))){angleZone=2;}
@@ -332,3 +331,84 @@ void freeStack(stack_t *stack)
 
 
 
+void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir la bonne taille il faut diviser par block size 
+    int i ; 
+    int action;
+    point_t canon ; 
+    point_t dropPoint ; 
+    int canonZone; 
+    int dropZone; 
+    float angleH;
+    float angleF;
+    int zoneAngleH;
+    int zoneAngleF; 
+    int agentZone; 
+    int reward;
+    agent_t *agent; 
+    stack_t *stack;
+    line_t line; 
+    float greedy=1; 
+    int maxAction;
+    stack= initStack(1000); 
+    
+    while (numberRun>0){
+        agent=initAgent(); 
+        canon=initCanon(canon); 
+        dropPoint= initDropPoint(dropPoint);
+        angleF=defineAngleF( canon.x, canon.x, dropPoint.x, dropPoint.y );
+        angleH=defineAngleH( canon.x, canon.y ); 
+        zoneAngleF=converterIntoAngleF(angleF);
+        zoneAngleH=converterIntoAngleH(angleH);
+        dropZone=convertIntoZone(dropPoint.x,dropPoint.y); 
+        canonZone= convertIntoZoneCanon(canon.x,canon.y); 
+        reward=0;
+
+        for (i=0; i<numberStep-1;i++){ 
+            action = takeAction(agent->x,agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); 
+            agentZone = convertIntoZone(agent->x, agent->y); 
+            line.receiverZone=agentZone; 
+            line.shooterZone =canonZone; 
+            line.angleHZone= zoneAngleH; 
+            line.angleFZone= zoneAngleF; 
+            line.action= action;
+            line.reward=0; 
+            actionStack(stack,line);
+            moveAgent(agent, action);
+            printf("wtf%d \n ",i);
+
+        }
+        action = takeAction(agent->x,agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); 
+        agentZone = convertIntoZone(agent->x, agent->y); 
+        line.receiverZone=agentZone; 
+        line.shooterZone =canonZone; 
+        line.angleHZone= zoneAngleH; 
+        line.angleFZone= zoneAngleF; 
+        line.action= action;
+        line.reward=0; 
+        actionStack(stack,line);
+        moveAgent(agent, action);
+        if (agentZone==dropZone){ 
+                    reward=1; 
+                }
+                else{reward= 0;}
+
+        printf("wtf2\n ");
+
+        Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=  
+                    + greedy* ( reward- Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
+       
+        while (!emptyStack(stack)){
+            maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]);
+            reward=line.reward;
+            line=unStack(stack);
+
+            Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=  
+                    + greedy* ( reward + LEARN_RATE *Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction]
+                    - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
+        }  
+        printf("wtf1 \n ");
+        numberRun--; 
+        greedy=greedy-1/((float)numberRun);
+        printf("wtf1 \n ");
+    } 
+} 
\ No newline at end of file
diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h
index 875f3149a710513ca1edc68456275fbf2e6cd00f..5d4fc877bd5cb18eb4850e341d3626e5362c55f6 100644
--- a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h
+++ b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h
@@ -67,5 +67,5 @@ int fullStack(stack_t *stack);
 void actionStack(stack_t *stack, line_t line);
 line_t unStack(stack_t *stack);
 void freeStack(stack_t *stack);
-
+void traningAgent( int numberRun, int numberStep, float *****Q);
 #endif
\ No newline at end of file