diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/ball.c b/travail_de_groupe/jeu_appren_par_renfo/src/ball.c index 5c73891b50def542e68ea0d99a4605d9871effb2..93bd07f91b5cb19346708f8c5cda0d15a990b8b9 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/ball.c +++ b/travail_de_groupe/jeu_appren_par_renfo/src/ball.c @@ -15,16 +15,49 @@ void initBall() ball.speed = 0; } +//ball_t ball; +int trajectoireAntoine[NUMBERPOINT_TRAJEC][2]; + +point_t initCanon (point_t canon){ + canon.x=(1+rand()%14)*BLOCK_SIZE; + canon.y=(1+rand()%8)*BLOCK_SIZE; + return canon; +} -float defineAngle(int canonX, int canonY, int xDropPoint, int yDropPoint) +point_t initDropPoint (point_t dropPoint){ + dropPoint.x=(16+rand()%14)*BLOCK_SIZE; + dropPoint.y=(1+rand()%8)*BLOCK_SIZE; + return dropPoint; +} + +float defineAngleF(int canonX, int canonY, int xDropPoint, int yDropPoint) { float distance; float angleSin; + canonX=canonX/BLOCK_SIZE; + canonY=canonY/BLOCK_SIZE; + xDropPoint= xDropPoint/BLOCK_SIZE; + yDropPoint= yDropPoint/BLOCK_SIZE; + distance = sqrtf(powf((float)(xDropPoint - canonX), 2) + powf((float)(yDropPoint - canonY), 2)); - angleSin = asinf(distance / (xDropPoint - canonX)); + angleSin = asinf(((float) (yDropPoint - canonY))/distance); return angleSin; } +float defineAngleH(int xCanon, int xDropPoint){ + int xNet =15 ; + int yNet= 4; + float angle ; + float yTemp; + xCanon= xCanon/BLOCK_SIZE; + xDropPoint=xDropPoint/BLOCK_SIZE; + + yTemp = lagrangeInterpolation((float)(xCanon+(xDropPoint-xCanon)/4), xCanon , 2,xNet , yNet , xDropPoint, 0 ); + angle= atanf( (float)(yTemp/ (xCanon+(xDropPoint-xCanon)/4) )) ; // possible faute ? + + return angle; +} + /* * Fonction qui prend une valeur de x et 3 points. Elle * renvoie la coordonnée y liée à la valeur de x sur la diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/ball.h b/travail_de_groupe/jeu_appren_par_renfo/src/ball.h index 2f22f9a567d89511f581ef1eba2db832f0666554..393bd56723adcbadb1f1381290f64c89a1dbfc90 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/ball.h +++ b/travail_de_groupe/jeu_appren_par_renfo/src/ball.h @@ -4,7 +4,10 @@ #include <stdlib.h> #include <time.h> #include <stdio.h> -#include <math.h> +#include <math.h> +#include "render.h" + + #include "render.h" @@ -12,8 +15,8 @@ typedef enum { - false, - true + false, true + } booleen_t; typedef struct ball @@ -29,10 +32,18 @@ typedef struct ball } ball_t; extern ball_t ball; +typedef struct point{ + int x; + int y; +} point_t ; + +//extern ball_t ball; extern int trajectoireAntoine[NUMBERPOINT_TRAJEC][2]; -void initBall(); -float defineAngle(int, int, int, int); +point_t initCanon (point_t canon); +point_t initDropPoint (point_t dropPoint); +float defineAngleF(int, int, int, int); +float defineAngleH(int xCanon, int xDropPoint); float lagrangeInterpolation(float, int, int, int, int, int, int); void calculTrajectoireAntoine2(int, int, int, int, int, int); diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/main.c b/travail_de_groupe/jeu_appren_par_renfo/src/main.c index f8f4d3c992e87b253f43c34e9e9159ad9ba52258..43710c0a0d8b5903bdf9333352587dddccd5733c 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/main.c +++ b/travail_de_groupe/jeu_appren_par_renfo/src/main.c @@ -3,18 +3,23 @@ int running; int game_state; -int main(){ - //float ***** Q = allocateAndInitiateQ(); - //writeQ(Q); - running = 1; - game_state = GAME; - readMapFromFile("map.txt"); - //printMap(); - initPlayer(); - initKeys(); - initBall(); - - mainLoop(); +int main(){ + float ***** Q = allocateAndInitiateQ(); + int i= 10; + + srand ( time(NULL)); + while (i>0){ + traningAgent(10000,5000, Q); + i--;} + + writeQ(Q); + // running = 1; + // game_state = GAME; + // readMapFromFile("map.txt"); + // initPlayer(); + // initKeys(); + + // mainLoop(); } \ No newline at end of file diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/map.h b/travail_de_groupe/jeu_appren_par_renfo/src/map.h index 13c7cf8bc4996d7009095775e2211542cfd65918..a617f9bde3267d440112874a0751514e4ef30b37 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/map.h +++ b/travail_de_groupe/jeu_appren_par_renfo/src/map.h @@ -5,7 +5,11 @@ #define MAP_WIDTH 31 +<<<<<<< HEAD #define MAP_HEIGHT 10 +======= +#define MAP_HEIGHT 11 +>>>>>>> qlearn extern int map[MAP_HEIGHT][MAP_WIDTH]; diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c index 65f18f155cf08a52bbfafe3aa1e19b44ab7bceca..3b5084014b908dca1e3fd7d054a62b589ead47c6 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c +++ b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c @@ -1,5 +1,20 @@ #include "qlearn.h" +agent_t * initAgent (){ + agent_t * agent =(agent_t*)malloc(sizeof(agent_t)); + if (agent ==NULL){ + printf("erreur alloc\n "); + exit (1); + } + agent->x=(16+rand()%14)*BLOCK_SIZE; + agent->y=(1+rand()%8)*BLOCK_SIZE; + agent->high=2*BLOCK_SIZE; + agent->weight=2*BLOCK_SIZE; + agent->speed = 1; + // si changement de speed => changement de collisiosn dans le takeaction + return(agent); +} + void moveAgent(agent_t * agent, int choice){ switch (choice) { @@ -36,7 +51,7 @@ float ***** allocateAndInitiateQ(){ for(i = 0; i < NUMBER_ZONE_RECEIVER; i++){ q[i] = malloc(sizeof(float ***) * NUMBER_ZONE_SHOOTER ); // alloc shooter zone if (q[i]==NULL) - { + { printf("problème d'allocation \n"); exit(1); } @@ -77,7 +92,6 @@ float ***** allocateAndInitiateQ(){ return q; } - void writeQ(float *****Q){ int i, j, k, l, m ; FILE * fp = fopen("q.txt", "w+"); @@ -87,13 +101,315 @@ void writeQ(float *****Q){ for(l= 0; l < 5; l++){ for(m= 0; m <5; m++){ fprintf(fp, "%f ", Q[i][j][k][l][m]); - } + }fprintf(fp, "\n"); } } - fprintf(fp, "\n"); } - fprintf(fp, "\n"); - } + }fprintf(fp, "\n"); fflush(fp); fclose(fp); -} \ No newline at end of file +} + + +int argmax(float * arr){ + int i; + float max = arr[0]; + int maxIndex = 0; + //printf("argmax: %f ", arr[0]); + for(i = 1; i < NUMBER_ACTION; i++){ + //printf("%f ", arr[i]); + if (arr[i] > max){ + max = arr[i]; + maxIndex = i; + } + } + //printf("\n"); + return maxIndex; +} + +int convertIntoZone(int xAgent,int yAgent){ + int zone=0; + xAgent=xAgent/BLOCK_SIZE; + yAgent=yAgent/BLOCK_SIZE; + if(xAgent> 15 && xAgent<23 && yAgent<=4){zone=0;} + else if(xAgent>22 && xAgent<31 && yAgent<=4){zone=1;} + else if(xAgent> 15 && xAgent<23 && yAgent<=9){zone=2;} + else if(xAgent> 22 && xAgent<= 31&& yAgent<=9){zone=3;} + + return zone ; +} +int convertIntoZoneCanon(int xCanon,int yCanon){ + int zone=0; + xCanon=xCanon/BLOCK_SIZE; + yCanon=yCanon/BLOCK_SIZE; + if(xCanon<9 && yCanon<=4){zone=0;} + else if(xCanon<15 && yCanon<=4){zone=1;} + else if(xCanon<9 && yCanon<9){zone=2;} + else if(xCanon<15&& yCanon<9){zone=3;} + + return zone ; +} + +int converterIntoAngleF(float angleF){ + int angleZone=0; + long angleFd=(long)(angleF*1000000); + if( ((long)(-M_PI/2*1000000)<=angleFd)&&(angleFd<=(long)((-M_PI/2+M_PI/5)*1000000))){angleZone=4;} + else if( ((long)((-(M_PI/2)+(M_PI/5))*1000000)<angleFd)&&(angleFd<=(long)((-M_PI/2+2*M_PI/5)*1000000))){angleZone=3;} + else if( (angleFd>(long)((-M_PI/2+2*M_PI/5)*1000000))&&(angleFd<=(long)((-M_PI/2+3*M_PI/5)*1000000))){angleZone=2;} + else if( (angleFd>(long)((-M_PI/2+3*M_PI/5)*1000000))&&(angleFd<=(long)((-M_PI/2+4*M_PI/5)*1000000))){angleZone=1;} + else if( (angleFd>(long)((-M_PI/2+4*M_PI/5)*1000000))&&(angleFd<=(long)(M_PI/2*1000000))){angleZone=0;} + + return(angleZone); +} + +int converterIntoAngleH(float angleH){ + int angleZone=0; + long angleHd=(long)(angleH*1000000); + if( (0<=angleHd)&&(angleHd<=(long)((M_PI/6)*1000000))){angleZone=0;} + else if(( ((long)((M_PI/6))*1000000)<angleHd)&&(angleHd<=(long)((M_PI/3)*1000000))){angleZone=1;} + else if( (angleHd>(long)((M_PI/3)*1000000))&&(angleHd<=(long)((M_PI/2)*1000000))){angleZone=2;} + + return(angleZone); +} + +int takeAction(int xAgent, int yAgent, float ***** Q, int canonZone, int angleHZone, int angleFZone, float eps){ + int action; + int proba = rand() % 10000; + int receiverZone=0; + if (proba < eps * 10000){ + if (xAgent/BLOCK_SIZE > (MAP_WIDTH-1)/2+1 && xAgent/BLOCK_SIZE < MAP_WIDTH- 2 && yAgent/BLOCK_SIZE > 1 && yAgent/BLOCK_SIZE < MAP_HEIGHT - 2){ + action = rand() % 5;// OK cas au centre + } + else if (xAgent/BLOCK_SIZE == (MAP_WIDTH-1)/2+1 && yAgent/BLOCK_SIZE > 1 && yAgent/BLOCK_SIZE < MAP_HEIGHT - 2){ + int possibleActions[4] = {1, 2, 3,4}; + action = possibleActions[rand() % 4];// OK cas filet + } + else if (xAgent/BLOCK_SIZE == (MAP_WIDTH-1)/2+1 && yAgent/BLOCK_SIZE== 1){ + int possibleActions[3] = {1, 3, 4}; + action = possibleActions[rand() % 3];// cas en haut a gauche + } + else if (xAgent/BLOCK_SIZE == (MAP_WIDTH-1)/2+1 && yAgent/BLOCK_SIZE==MAP_HEIGHT - 2){ + int possibleActions[3] = {1, 2, 4}; + action = possibleActions[rand() % 3];// cas en bas a gauche + } + else if (yAgent/BLOCK_SIZE ==1 && xAgent/BLOCK_SIZE > (MAP_WIDTH-1)/2+1 && xAgent/BLOCK_SIZE < MAP_WIDTH- 2){ + int possibleActions[4] = {0, 1,3,4}; + action = possibleActions[rand() % 4];// cas en haut au milieu + } + else if (xAgent/BLOCK_SIZE == MAP_WIDTH- 2 && yAgent/BLOCK_SIZE == 1){ + int possibleActions[3] = {0, 3,4}; + action = possibleActions[rand() % 3];// cas en haut a droite + } + else if (xAgent/BLOCK_SIZE == MAP_WIDTH-2 && yAgent/BLOCK_SIZE <MAP_HEIGHT-2 && yAgent/BLOCK_SIZE>1){ + int possibleActions[4] = {0,2,3,4}; + action = possibleActions[rand() % 4];// cas a droite au milieu + } + else if (xAgent/BLOCK_SIZE== MAP_WIDTH-2 && yAgent/BLOCK_SIZE == MAP_HEIGHT-2){ + int possibleActions[3] = {0, 2,4}; + action = possibleActions[rand() % 3];// cas en bas a droite + } + else if (xAgent/BLOCK_SIZE > (MAP_WIDTH-1)/2+1 && xAgent/BLOCK_SIZE < MAP_WIDTH- 2 && yAgent/BLOCK_SIZE == MAP_HEIGHT-2){ + int possibleActions[4] = {0,1,2,4}; + action = possibleActions[rand() % 4]; + } + else{ + action = rand() % 5; + } + } + else{ + receiverZone= convertIntoZone(xAgent/BLOCK_SIZE,yAgent/BLOCK_SIZE); + action = argmax(Q[receiverZone][canonZone][angleHZone][angleFZone]); + //printf("wtf"); + } + return action; +} + + +int setReward(int xAgent, int yAgent, int dropZone){ + int zoneAgent; + int reward=0; + + zoneAgent= convertIntoZone( xAgent,yAgent); + if (zoneAgent==dropZone){ + reward=1; + } + return (reward); +} + + +stack_t* initStack(int numberelt){ + stack_t *stack=NULL; + + stack=(stack_t *)malloc(sizeof(stack_t)); // allocation du ptr de tête. + + if (stack==NULL){ // vérification de l'allocation. + printf("problème d'allocation\n"); + exit(1); + } + + stack->base = (line_t *) malloc(numberelt*sizeof(line_t));// allocation de la stack de longueur numberelt. + + if (stack->base==NULL){ + printf("problème d'allocation\n"); + exit(1); + } + + stack->numberelt=numberelt; //ajout du nombre d'élément insérable dans la file. + stack->top=-1; //initialisation de l'indice du dernier élément. + + return (stack); +} + + +int emptyStack(stack_t *stack){ + + int result =0; + + if (stack->top==-1){ // test de l'indice du top. + + result=1; // la stack est vide. + } + + return (result); +} + +int fullStack(stack_t *stack) +{ + int result = 0; + + if (stack->numberelt == (stack->top)+1) //si le top est l'indice du dernier élément élément de la stack + { + result = 1; + } + + return result; +} + +void actionStack(stack_t *stack, line_t element) +{ + if(!fullStack(stack)) //si la stack n'est pas pleine + { + stack->base[(stack->top)+1] = element; //on ajoute l'élément à l'indice top+1 + stack->top = stack->top+1; //on incrémente l'indice du top + } + else + { + printf("Pile pleine\n" ); + } +} + +line_t unStack(stack_t *stack) +{ + line_t top; + + if(!emptyStack(stack)) //si la stack n'est pas vide + { + top = stack->base[stack->top]; //on récupère le top + stack->top = stack->top-1; //on décrémente l'indice du top + } + else + { + printf("Pile vide"); + } + + return top; +} + +void freeStack(stack_t *stack) +{ + if(stack != NULL) + { + free(stack->base); //on libère le tableau dynamique + free(stack); //on libère la tête de la stack + //printf("Pile libérée\n"); + } + else + { + printf("Libération impossible, stack == NULL\n"); + } +} + + + +void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir la bonne taille il faut diviser par block size + int i ; + int action; + point_t canon ; + point_t dropPoint ; + int canonZone; + int dropZone; + float angleH; + float angleF; + int zoneAngleH; + int zoneAngleF; + int agentZone; + int reward; + agent_t *agent; + stack_t *stack; + line_t line; + float greedy=1; + int maxAction; + stack= initStack(6000); + + while (numberRun>0){ + agent=initAgent(); + canon=initCanon(canon); + dropPoint= initDropPoint(dropPoint); + angleF=defineAngleF( canon.x, canon.x, dropPoint.x, dropPoint.y ); + angleH=defineAngleH( canon.x, canon.y ); + zoneAngleF=converterIntoAngleF(angleF); + zoneAngleH=converterIntoAngleH(angleH); + dropZone=convertIntoZone(dropPoint.x,dropPoint.y); + canonZone= convertIntoZoneCanon(canon.x,canon.y); + reward=0; + printf("%d %d %d %d \n",dropZone, canonZone,zoneAngleH,zoneAngleF); + + for (i=0; i<numberStep-1;i++){ + action = takeAction(agent->x,agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); + agentZone = convertIntoZone(agent->x, agent->y); + line.receiverZone=agentZone; + line.shooterZone =canonZone; + line.angleHZone= zoneAngleH; + line.angleFZone= zoneAngleF; + line.action= action; + line.reward= reward ; + actionStack(stack,line); + moveAgent(agent, action); + + } + action = takeAction(agent->x, agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); + agentZone = convertIntoZone(agent->x, agent->y); + if (agentZone==dropZone){ + reward=1; + } + else{reward= 0;} + line.receiverZone=agentZone; + line.shooterZone =canonZone; + line.angleHZone= zoneAngleH; + line.angleFZone= zoneAngleF; + line.action= action; + line.reward = reward; + // actionStack(stack,line); + moveAgent(agent, action); + + + + Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += + + LEARN_RATE* ( reward - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] ); + + while (!emptyStack(stack)){ + maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]); + reward=line.reward; + line=unStack(stack); + + Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += + + LEARN_RATE* ( reward + DISCOUNT*Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction] + - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] ); + } + numberRun--; + greedy=greedy-1/((float)numberRun); + + if ( numberRun%1000000==1){printf (" %d \n ", numberRun);} + } + freeStack(stack); +} \ No newline at end of file diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h index deffca1532aaaecc4667a94e10fbd8b6eda02fa2..60fe308715821fa9a24e68b723829f1d0e5b9c93 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h +++ b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h @@ -4,12 +4,21 @@ #include <stdio.h> #include <stdlib.h> #include <time.h> -//#include "ball.h" +#include "ball.h" #include "math.h" #include "map.h" +#define M_PI 3.14159265358979323846 + +#define LEARN_RATE 0.8 +#define DISCOUNT 0.2 + + + + #define NUMBER_ZONE_SHOOTER 4 #define NUMBER_ZONE_RECEIVER 4 +#define NUMBER_ACTION 5 #define FOWARD 0//<-- #define BACK 1 //--> @@ -17,16 +26,50 @@ #define DOWN 3 #define WAIT 4 + typedef struct agent { int x; int y; - int heigth; + int high; int weight; int speed; } agent_t; +typedef struct line { + int receiverZone; + int shooterZone; + int angleHZone; + int angleFZone; + int action; + int reward; +}line_t; + +typedef struct stack +{ + line_t *base; + int numberelt; + int top; + +} stack_t; + + +agent_t* initAgent ( ); void moveAgent(agent_t * agent, int choice); float ***** allocateAndInitiateQ(); -void writeQ(float *****Q); +void writeQ(float *****); +int argmax(float * ); +int convertIntoZone(int ,int y); +int convertIntoZoneCanon(int xCanon,int yCanon); +int converterIntoAngleF(float); +int converterIntoAngleH(float); +int takeAction(int ,int , float ***** , int , int, int, float ); +int setReward(int , int , int ); +stack_t* initStack (int nbelt); +int emptyStack (stack_t *stack); +int fullStack(stack_t *stack); +void actionStack(stack_t *stack, line_t line); +line_t unStack(stack_t *stack); +void freeStack(stack_t *stack); +void traningAgent( int numberRun, int numberStep, float *****Q); #endif \ No newline at end of file