diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/ball.c b/travail_de_groupe/jeu_appren_par_renfo/src/ball.c
index 5c73891b50def542e68ea0d99a4605d9871effb2..93bd07f91b5cb19346708f8c5cda0d15a990b8b9 100644
--- a/travail_de_groupe/jeu_appren_par_renfo/src/ball.c
+++ b/travail_de_groupe/jeu_appren_par_renfo/src/ball.c
@@ -15,16 +15,49 @@ void initBall()
     ball.speed = 0;
 
 }
+//ball_t ball;
+int trajectoireAntoine[NUMBERPOINT_TRAJEC][2];
+
+point_t initCanon (point_t canon){
+    canon.x=(1+rand()%14)*BLOCK_SIZE;
+    canon.y=(1+rand()%8)*BLOCK_SIZE;
+    return canon; 
+} 
 
-float defineAngle(int canonX, int canonY, int xDropPoint, int yDropPoint)
+point_t initDropPoint (point_t dropPoint){
+    dropPoint.x=(16+rand()%14)*BLOCK_SIZE;
+    dropPoint.y=(1+rand()%8)*BLOCK_SIZE;
+    return dropPoint; 
+} 
+
+float defineAngleF(int canonX, int canonY, int xDropPoint, int yDropPoint)
 {
     float distance;
     float angleSin;
+    canonX=canonX/BLOCK_SIZE; 
+    canonY=canonY/BLOCK_SIZE; 
+    xDropPoint= xDropPoint/BLOCK_SIZE; 
+    yDropPoint= yDropPoint/BLOCK_SIZE; 
+
     distance = sqrtf(powf((float)(xDropPoint - canonX), 2) + powf((float)(yDropPoint - canonY), 2));
-    angleSin = asinf(distance / (xDropPoint - canonX));
+    angleSin = asinf(((float) (yDropPoint - canonY))/distance);
     return angleSin;
 }
 
+float defineAngleH(int xCanon, int xDropPoint){
+    int xNet =15 ;
+    int yNet= 4; 
+    float angle ; 
+    float yTemp; 
+    xCanon= xCanon/BLOCK_SIZE;
+    xDropPoint=xDropPoint/BLOCK_SIZE;
+
+    yTemp = lagrangeInterpolation((float)(xCanon+(xDropPoint-xCanon)/4), xCanon , 2,xNet , yNet , xDropPoint, 0 ); 
+    angle= atanf( (float)(yTemp/ (xCanon+(xDropPoint-xCanon)/4) )) ;  // possible faute ? 
+
+    return angle; 
+} 
+
 /*
  * Fonction qui prend une valeur de x et 3 points. Elle
  * renvoie la coordonnée y liée à la valeur de x sur la
diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/ball.h b/travail_de_groupe/jeu_appren_par_renfo/src/ball.h
index 2f22f9a567d89511f581ef1eba2db832f0666554..393bd56723adcbadb1f1381290f64c89a1dbfc90 100644
--- a/travail_de_groupe/jeu_appren_par_renfo/src/ball.h
+++ b/travail_de_groupe/jeu_appren_par_renfo/src/ball.h
@@ -4,7 +4,10 @@
 #include <stdlib.h>
 #include <time.h>
 #include <stdio.h>
-#include <math.h>
+#include <math.h> 
+#include "render.h"
+
+
 
 #include "render.h"
 
@@ -12,8 +15,8 @@
 
 typedef enum
 {
-    false,
-    true
+    false, true
+    
 } booleen_t;
 
 typedef struct ball
@@ -29,10 +32,18 @@ typedef struct ball
 } ball_t;
 
 extern ball_t ball;
+typedef struct point{
+    int x;
+    int y; 
+} point_t ; 
+
+//extern ball_t ball;
 extern int trajectoireAntoine[NUMBERPOINT_TRAJEC][2];
 
-void initBall();
-float defineAngle(int, int, int, int);
+point_t  initCanon (point_t canon);
+point_t initDropPoint (point_t dropPoint);
+float defineAngleF(int, int, int, int);
+float defineAngleH(int xCanon, int xDropPoint);
 
 float lagrangeInterpolation(float, int, int, int, int, int, int);
 void calculTrajectoireAntoine2(int, int, int, int, int, int);
diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/main.c b/travail_de_groupe/jeu_appren_par_renfo/src/main.c
index f8f4d3c992e87b253f43c34e9e9159ad9ba52258..43710c0a0d8b5903bdf9333352587dddccd5733c 100644
--- a/travail_de_groupe/jeu_appren_par_renfo/src/main.c
+++ b/travail_de_groupe/jeu_appren_par_renfo/src/main.c
@@ -3,18 +3,23 @@
 int running;
 int game_state;
 
-int main(){
 
-    //float ***** Q = allocateAndInitiateQ();
-    //writeQ(Q);
-    running = 1;
-    game_state = GAME;
-    readMapFromFile("map.txt");
-    //printMap();
-    initPlayer();
-    initKeys();
-    initBall();
-
-    mainLoop();
+int main(){ 
+    float ***** Q = allocateAndInitiateQ();
+    int i= 10;
+    
+    srand ( time(NULL));
+    while (i>0){
+    traningAgent(10000,5000, Q);
+    i--;} 
+    
+    writeQ(Q);
+    // running = 1;
+    // game_state = GAME;
+    // readMapFromFile("map.txt");
+    // initPlayer();
+    // initKeys();
+    
+    // mainLoop();
 
 } 
\ No newline at end of file
diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/map.h b/travail_de_groupe/jeu_appren_par_renfo/src/map.h
index 13c7cf8bc4996d7009095775e2211542cfd65918..a617f9bde3267d440112874a0751514e4ef30b37 100644
--- a/travail_de_groupe/jeu_appren_par_renfo/src/map.h
+++ b/travail_de_groupe/jeu_appren_par_renfo/src/map.h
@@ -5,7 +5,11 @@
 
 
 #define MAP_WIDTH 31
+<<<<<<< HEAD
 #define MAP_HEIGHT 10
+=======
+#define MAP_HEIGHT 11
+>>>>>>> qlearn
 
 
 extern int map[MAP_HEIGHT][MAP_WIDTH];
diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c
index 65f18f155cf08a52bbfafe3aa1e19b44ab7bceca..3b5084014b908dca1e3fd7d054a62b589ead47c6 100644
--- a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c
+++ b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c
@@ -1,5 +1,20 @@
 #include "qlearn.h"
 
+agent_t * initAgent (){
+    agent_t * agent =(agent_t*)malloc(sizeof(agent_t));
+    if (agent ==NULL){
+        printf("erreur alloc\n  ");
+        exit (1);
+    }  
+    agent->x=(16+rand()%14)*BLOCK_SIZE;
+    agent->y=(1+rand()%8)*BLOCK_SIZE;
+    agent->high=2*BLOCK_SIZE;
+    agent->weight=2*BLOCK_SIZE; 
+    agent->speed = 1;   
+      // si changement de speed => changement de collisiosn dans le takeaction 
+      return(agent);
+}   
+
 void moveAgent(agent_t * agent, int choice){
     switch (choice)
     {
@@ -36,7 +51,7 @@ float ***** allocateAndInitiateQ(){
     for(i = 0; i <  NUMBER_ZONE_RECEIVER; i++){  
         q[i] = malloc(sizeof(float ***) * NUMBER_ZONE_SHOOTER ); // alloc shooter zone 
         if (q[i]==NULL)
-        {
+        { 
             printf("problème d'allocation \n");            
             exit(1);
         }
@@ -77,7 +92,6 @@ float ***** allocateAndInitiateQ(){
     return q;
 }
 
-
 void writeQ(float *****Q){
     int i, j, k, l, m ;
     FILE * fp = fopen("q.txt", "w+");
@@ -87,13 +101,315 @@ void writeQ(float *****Q){
                 for(l= 0; l < 5; l++){
                     for(m= 0; m <5; m++){
                          fprintf(fp, "%f ", Q[i][j][k][l][m]);
-                    }
+                    }fprintf(fp, "\n");
                 }
             }
-            fprintf(fp, "\n");
         }
-        fprintf(fp, "\n");
-    }
+    }fprintf(fp, "\n");
     fflush(fp);
     fclose(fp);
-}
\ No newline at end of file
+}
+
+
+int argmax(float * arr){
+    int i;
+    float max = arr[0];
+    int maxIndex = 0;
+    //printf("argmax:  %f ", arr[0]);
+    for(i = 1; i < NUMBER_ACTION; i++){
+        //printf("%f ", arr[i]);
+        if (arr[i] > max){
+            max = arr[i];
+            maxIndex = i;
+        }
+    }
+    //printf("\n");
+    return maxIndex;
+}
+
+int convertIntoZone(int xAgent,int yAgent){
+    int zone=0; 
+    xAgent=xAgent/BLOCK_SIZE;
+    yAgent=yAgent/BLOCK_SIZE;
+    if(xAgent> 15 && xAgent<23 && yAgent<=4){zone=0;} 
+    else if(xAgent>22 && xAgent<31 && yAgent<=4){zone=1;} 
+    else if(xAgent> 15 && xAgent<23 && yAgent<=9){zone=2;}
+    else if(xAgent> 22 && xAgent<= 31&& yAgent<=9){zone=3;}
+     
+    return zone ; 
+}
+int convertIntoZoneCanon(int xCanon,int yCanon){
+    int zone=0; 
+    xCanon=xCanon/BLOCK_SIZE;
+    yCanon=yCanon/BLOCK_SIZE;
+    if(xCanon<9 && yCanon<=4){zone=0;} 
+    else if(xCanon<15 && yCanon<=4){zone=1;} 
+    else if(xCanon<9 && yCanon<9){zone=2;} 
+    else if(xCanon<15&& yCanon<9){zone=3;}
+     
+    return zone ; 
+}
+
+int converterIntoAngleF(float angleF){
+    int angleZone=0;
+    long angleFd=(long)(angleF*1000000); 
+    if( ((long)(-M_PI/2*1000000)<=angleFd)&&(angleFd<=(long)((-M_PI/2+M_PI/5)*1000000))){angleZone=4;} 
+    else if( ((long)((-(M_PI/2)+(M_PI/5))*1000000)<angleFd)&&(angleFd<=(long)((-M_PI/2+2*M_PI/5)*1000000))){angleZone=3;} 
+    else if( (angleFd>(long)((-M_PI/2+2*M_PI/5)*1000000))&&(angleFd<=(long)((-M_PI/2+3*M_PI/5)*1000000))){angleZone=2;}
+    else if( (angleFd>(long)((-M_PI/2+3*M_PI/5)*1000000))&&(angleFd<=(long)((-M_PI/2+4*M_PI/5)*1000000))){angleZone=1;}
+    else if( (angleFd>(long)((-M_PI/2+4*M_PI/5)*1000000))&&(angleFd<=(long)(M_PI/2*1000000))){angleZone=0;}   
+    
+    return(angleZone);    
+} 
+
+int converterIntoAngleH(float angleH){
+    int angleZone=0;
+    long angleHd=(long)(angleH*1000000); 
+    if( (0<=angleHd)&&(angleHd<=(long)((M_PI/6)*1000000))){angleZone=0;} 
+    else if(( ((long)((M_PI/6))*1000000)<angleHd)&&(angleHd<=(long)((M_PI/3)*1000000))){angleZone=1;} 
+    else if( (angleHd>(long)((M_PI/3)*1000000))&&(angleHd<=(long)((M_PI/2)*1000000))){angleZone=2;} 
+    
+    return(angleZone);    
+} 
+
+int takeAction(int xAgent, int yAgent, float ***** Q, int canonZone, int angleHZone, int angleFZone, float eps){
+    int action;
+    int proba = rand() % 10000;
+    int receiverZone=0;
+    if (proba < eps * 10000){
+        if (xAgent/BLOCK_SIZE > (MAP_WIDTH-1)/2+1 && xAgent/BLOCK_SIZE < MAP_WIDTH- 2 && yAgent/BLOCK_SIZE > 1 && yAgent/BLOCK_SIZE < MAP_HEIGHT - 2){
+            action = rand() % 5;// OK cas au centre
+        }
+        else if (xAgent/BLOCK_SIZE == (MAP_WIDTH-1)/2+1 && yAgent/BLOCK_SIZE > 1 && yAgent/BLOCK_SIZE < MAP_HEIGHT - 2){
+            int possibleActions[4] = {1, 2, 3,4};
+            action = possibleActions[rand() % 4];// OK cas filet 
+        }
+        else if (xAgent/BLOCK_SIZE == (MAP_WIDTH-1)/2+1 && yAgent/BLOCK_SIZE== 1){
+            int possibleActions[3] = {1, 3, 4};
+            action = possibleActions[rand() % 3];// cas en haut a gauche 
+        }
+        else if (xAgent/BLOCK_SIZE == (MAP_WIDTH-1)/2+1  && yAgent/BLOCK_SIZE==MAP_HEIGHT - 2){
+            int possibleActions[3] = {1, 2, 4};
+            action = possibleActions[rand() % 3];// cas en bas a gauche 
+        }
+        else if (yAgent/BLOCK_SIZE ==1 && xAgent/BLOCK_SIZE > (MAP_WIDTH-1)/2+1 && xAgent/BLOCK_SIZE < MAP_WIDTH- 2){
+            int possibleActions[4] = {0, 1,3,4};
+            action = possibleActions[rand() % 4];// cas en haut au milieu  
+        }
+        else if (xAgent/BLOCK_SIZE == MAP_WIDTH- 2 && yAgent/BLOCK_SIZE == 1){
+            int possibleActions[3] = {0, 3,4};
+            action = possibleActions[rand() % 3];// cas en haut a droite
+        }
+        else if (xAgent/BLOCK_SIZE ==  MAP_WIDTH-2  && yAgent/BLOCK_SIZE <MAP_HEIGHT-2 && yAgent/BLOCK_SIZE>1){
+            int possibleActions[4] = {0,2,3,4};
+            action = possibleActions[rand() % 4];// cas a droite au milieu 
+        }
+        else if (xAgent/BLOCK_SIZE== MAP_WIDTH-2 && yAgent/BLOCK_SIZE == MAP_HEIGHT-2){
+            int possibleActions[3] = {0, 2,4};
+            action = possibleActions[rand() % 3];// cas en bas a droite 
+        }
+        else if (xAgent/BLOCK_SIZE > (MAP_WIDTH-1)/2+1 && xAgent/BLOCK_SIZE < MAP_WIDTH- 2 && yAgent/BLOCK_SIZE == MAP_HEIGHT-2){
+            int possibleActions[4] = {0,1,2,4};
+            action = possibleActions[rand() % 4];
+        }
+        else{
+            action = rand() % 5;
+        }
+    }
+    else{
+        receiverZone= convertIntoZone(xAgent/BLOCK_SIZE,yAgent/BLOCK_SIZE);
+        action = argmax(Q[receiverZone][canonZone][angleHZone][angleFZone]);
+        //printf("wtf");
+    }
+    return action;
+}
+
+
+int setReward(int xAgent, int yAgent, int dropZone){
+    int zoneAgent; 
+    int reward=0; 
+
+    zoneAgent= convertIntoZone( xAgent,yAgent); 
+    if (zoneAgent==dropZone){
+        reward=1;
+    } 
+    return (reward); 
+} 
+
+
+stack_t* initStack(int numberelt){
+    stack_t          *stack=NULL; 
+
+    stack=(stack_t *)malloc(sizeof(stack_t)); // allocation du ptr de tête. 
+
+    if (stack==NULL){  // vérification de l'allocation.   
+        printf("problème d'allocation\n");
+        exit(1);
+    }
+
+    stack->base = (line_t *) malloc(numberelt*sizeof(line_t));// allocation de la stack de longueur numberelt. 
+
+    if (stack->base==NULL){
+        printf("problème d'allocation\n");
+        exit(1);
+    }
+
+    stack->numberelt=numberelt; //ajout du nombre d'élément insérable dans la file.
+    stack->top=-1; //initialisation de l'indice du dernier élément.
+    
+    return (stack);
+}
+
+
+int emptyStack(stack_t *stack){
+    
+   int        result =0; 
+
+    if (stack->top==-1){   // test de l'indice du top. 
+        
+        result=1;  // la stack est vide.
+    }
+
+    return (result);
+}
+
+int fullStack(stack_t *stack)
+{
+    int result = 0;
+
+    if (stack->numberelt == (stack->top)+1) //si le top est l'indice du dernier élément élément de la stack
+    {
+        result = 1;
+    }
+
+    return result;
+}
+
+void actionStack(stack_t *stack, line_t element)
+{
+    if(!fullStack(stack)) //si la stack n'est pas pleine
+    {
+        stack->base[(stack->top)+1] = element; //on ajoute l'élément à l'indice top+1
+        stack->top = stack->top+1; //on incrémente l'indice du top
+    }
+    else
+    {
+        printf("Pile pleine\n" );
+    }
+}
+
+line_t unStack(stack_t *stack)
+{
+    line_t top;
+    
+    if(!emptyStack(stack)) //si la stack n'est pas vide
+    {
+        top = stack->base[stack->top]; //on récupère le top
+        stack->top = stack->top-1; //on décrémente l'indice du top
+    }
+    else
+    {
+        printf("Pile vide");
+    }
+
+    return top;
+}
+
+void freeStack(stack_t *stack)
+{
+    if(stack != NULL)
+    {
+        free(stack->base); //on libère le tableau dynamique
+        free(stack);    //on libère la tête de la stack
+        //printf("Pile libérée\n");
+    }
+    else
+    {
+        printf("Libération impossible, stack == NULL\n");
+    }
+}
+
+
+
+void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir la bonne taille il faut diviser par block size 
+    int i ; 
+    int action;
+    point_t canon ; 
+    point_t dropPoint ; 
+    int canonZone; 
+    int dropZone; 
+    float angleH;
+    float angleF;
+    int zoneAngleH;
+    int zoneAngleF; 
+    int agentZone; 
+    int reward;
+    agent_t *agent; 
+    stack_t *stack;
+    line_t line; 
+    float greedy=1; 
+    int maxAction;
+    stack= initStack(6000); 
+    
+    while (numberRun>0){
+        agent=initAgent(); 
+        canon=initCanon(canon); 
+        dropPoint= initDropPoint(dropPoint);
+        angleF=defineAngleF( canon.x, canon.x, dropPoint.x, dropPoint.y );
+        angleH=defineAngleH( canon.x, canon.y ); 
+        zoneAngleF=converterIntoAngleF(angleF);
+        zoneAngleH=converterIntoAngleH(angleH);
+        dropZone=convertIntoZone(dropPoint.x,dropPoint.y); 
+        canonZone= convertIntoZoneCanon(canon.x,canon.y); 
+        reward=0; 
+        printf("%d %d %d %d \n",dropZone, canonZone,zoneAngleH,zoneAngleF);
+
+        for (i=0; i<numberStep-1;i++){ 
+            action = takeAction(agent->x,agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); 
+            agentZone = convertIntoZone(agent->x, agent->y); 
+            line.receiverZone=agentZone; 
+            line.shooterZone =canonZone; 
+            line.angleHZone= zoneAngleH; 
+            line.angleFZone= zoneAngleF; 
+            line.action= action;
+            line.reward= reward ; 
+            actionStack(stack,line);
+            moveAgent(agent, action);
+
+        }
+        action = takeAction(agent->x, agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); 
+        agentZone = convertIntoZone(agent->x, agent->y); 
+        if (agentZone==dropZone){ 
+                   reward=1; 
+                }
+                else{reward= 0;}
+        line.receiverZone=agentZone; 
+        line.shooterZone =canonZone; 
+        line.angleHZone= zoneAngleH; 
+        line.angleFZone= zoneAngleF; 
+        line.action= action;
+        line.reward = reward; 
+       // actionStack(stack,line);
+        moveAgent(agent, action);
+        
+        
+
+        Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=  
+                    + LEARN_RATE* ( reward - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
+       
+        while (!emptyStack(stack)){
+            maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]);
+            reward=line.reward;
+            line=unStack(stack);
+
+            Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=  
+                    + LEARN_RATE* ( reward +  DISCOUNT*Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction]
+                    - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
+        }  
+        numberRun--; 
+        greedy=greedy-1/((float)numberRun);
+
+        if ( numberRun%1000000==1){printf (" %d \n  ", numberRun);} 
+    } 
+    freeStack(stack);
+} 
\ No newline at end of file
diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h
index deffca1532aaaecc4667a94e10fbd8b6eda02fa2..60fe308715821fa9a24e68b723829f1d0e5b9c93 100644
--- a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h
+++ b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h
@@ -4,12 +4,21 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>   
-//#include "ball.h"
+#include "ball.h"
 #include "math.h"
 #include "map.h"
 
+#define M_PI 3.14159265358979323846
+
+#define LEARN_RATE 0.8
+#define DISCOUNT 0.2
+
+
+
+
 #define NUMBER_ZONE_SHOOTER 4
 #define NUMBER_ZONE_RECEIVER 4
+#define NUMBER_ACTION 5 
 
 #define FOWARD 0//<--
 #define BACK 1 //-->
@@ -17,16 +26,50 @@
 #define DOWN 3
 #define WAIT 4 
 
+
 typedef struct agent {
     int x;
     int y;
-    int heigth;
+    int high;
     int weight;
     int speed; 
 } agent_t;
 
+typedef struct line {
+    int receiverZone;
+    int shooterZone; 
+    int angleHZone;
+    int angleFZone; 
+    int action;
+    int reward; 
+}line_t; 
+
+typedef struct stack
+{
+	line_t *base;  
+	int 	   numberelt;  
+	int 	   top; 
+
+} stack_t;
+
+
 
+agent_t* initAgent ( );
 void moveAgent(agent_t * agent, int choice);
 float ***** allocateAndInitiateQ();
-void writeQ(float *****Q);
+void writeQ(float *****);
+int argmax(float * );
+int convertIntoZone(int ,int y);
+int convertIntoZoneCanon(int xCanon,int yCanon);
+int converterIntoAngleF(float);
+int converterIntoAngleH(float);
+int takeAction(int ,int , float ***** , int , int, int, float );
+int setReward(int , int , int );
+stack_t* initStack (int nbelt);
+int emptyStack (stack_t *stack);
+int fullStack(stack_t *stack);
+void actionStack(stack_t *stack, line_t line);
+line_t unStack(stack_t *stack);
+void freeStack(stack_t *stack);
+void traningAgent( int numberRun, int numberStep, float *****Q);
 #endif
\ No newline at end of file