diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/ball.c b/travail_de_groupe/jeu_appren_par_renfo/src/ball.c index 19c7ff3ece1dc8f57e4d2b7e96589c61b2650275..4de5849e2c5c1fbf890b1372003632e90f2f4e30 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/ball.c +++ b/travail_de_groupe/jeu_appren_par_renfo/src/ball.c @@ -14,16 +14,49 @@ void initBall() ball.angle = -pi; ball.speed = 0; } +//ball_t ball; +int trajectoireAntoine[NUMBERPOINT_TRAJEC][2]; + +point_t initCanon (point_t canon){ + canon.x=(1+rand()%14); + canon.y=(1+rand()%8); + return canon; +} -float defineAngle(int canonX, int canonY, int xDropPoint, int yDropPoint) +point_t initDropPoint (point_t dropPoint){ + dropPoint.x=(16+rand()%14); + dropPoint.y=(1+rand()%8); + return dropPoint; +} + +float defineAngleF(int canonX, int canonY, int xDropPoint, int yDropPoint) { float distance; float angleSin; + canonX=canonX; + canonY=canonY; + xDropPoint= xDropPoint; + yDropPoint= yDropPoint; + distance = sqrtf(powf((float)(xDropPoint - canonX), 2) + powf((float)(yDropPoint - canonY), 2)); - angleSin = asinf(distance / (xDropPoint - canonX)); + angleSin = asinf(((float) (yDropPoint - canonY))/distance); return angleSin; } +float defineAngleH(int xCanon, int xDropPoint){ + int xNet =15 ; + int yNet= 4; + float angle ; + float yTemp; + xCanon= xCanon; + xDropPoint=xDropPoint; + + yTemp = lagrangeInterpolation((float)(xCanon+(xDropPoint-xCanon)/4), xCanon , 2,xNet , yNet , xDropPoint, 0 ); + angle= atanf( (float)(yTemp/ (xCanon+(xDropPoint-xCanon)/4) )) ; // possible faute ? + + return angle; +} + /* * Fonction qui prend une valeur de x et 3 points. Elle * renvoie la coordonnée y liée à la valeur de x sur la diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/ball.h b/travail_de_groupe/jeu_appren_par_renfo/src/ball.h index 61f10d9fdcb323f697e3f84fc375ffc3f35dd892..bf59edcff3b1c00de7da928d4f0c4ef21540497f 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/ball.h +++ b/travail_de_groupe/jeu_appren_par_renfo/src/ball.h @@ -4,7 +4,10 @@ #include <stdlib.h> #include <time.h> #include <stdio.h> -#include <math.h> +#include <math.h> +#include "render.h" + + #include "render.h" @@ -12,8 +15,8 @@ typedef enum { - false, - true + false, true + } booleen_t; typedef struct ball @@ -29,11 +32,21 @@ typedef struct ball } ball_t; extern ball_t ball; +typedef struct point{ + int x; + int y; +} point_t ; + +//extern ball_t ball; extern int trajectoireAntoine[NUMBERPOINT_TRAJEC][2]; void initBall(); float defineAngle(int, int, int, int); void updateBall(); +point_t initCanon (point_t canon); +point_t initDropPoint (point_t dropPoint); +float defineAngleF(int, int, int, int); +float defineAngleH(int xCanon, int xDropPoint); float lagrangeInterpolation(float, int, int, int, int, int, int); void calculTrajectoireAntoine2(int, int, int, int, int, int); diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/main.c b/travail_de_groupe/jeu_appren_par_renfo/src/main.c index f8adb80f0f4020dc95969b76434c363ff8eacaa9..56b932890e41905d54b7ce10aa500344168181b2 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/main.c +++ b/travail_de_groupe/jeu_appren_par_renfo/src/main.c @@ -15,4 +15,4 @@ int main() initBall(); mainLoop(); -} \ No newline at end of file +} diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/map.h b/travail_de_groupe/jeu_appren_par_renfo/src/map.h index e291f869f9b2356f715534295bc6374dc8ba51ca..20248bf32b3782ecbdce1d8c4407e0eba8ce9d40 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/map.h +++ b/travail_de_groupe/jeu_appren_par_renfo/src/map.h @@ -8,7 +8,7 @@ //#include "main.h" #define MAP_WIDTH 31 -#define MAP_HEIGHT 10 +#define MAP_HEIGHT 11 extern int map[MAP_HEIGHT][MAP_WIDTH]; diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c new file mode 100644 index 0000000000000000000000000000000000000000..0830c3b252d5d2dffef1533b5744f2ee97a2ee83 --- /dev/null +++ b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c @@ -0,0 +1,416 @@ +#include "qlearn.h" + +agent_t * initAgent (){ + agent_t * agent =(agent_t*)malloc(sizeof(agent_t)); + if (agent ==NULL){ + printf("erreur alloc\n "); + exit (1); + } + agent->x=(16+rand()%14); + agent->y=(1+rand()%8); + agent->high=2; + agent->weight=2; + agent->speed = 1; + // si changement de speed => changement de collisiosn dans le takeaction + return(agent); +} + +void moveAgent(agent_t * agent, int choice){ + switch (choice) + { + case BACK: + agent->x += 1*agent->speed; //Avancer + break; + + case FOWARD: + agent->x -= 1*agent->speed; // reculer + break; + + case UP: + agent->y += 1*agent->speed; + break; + + case DOWN: + agent->y -= 1*agent->speed; + break; + case WAIT: + break; + } +} + +float ***** allocateAndInitiateQ(){ + int i,j,k,l,m; + + float ***** q = malloc(sizeof(float ****) * NUMBER_ZONE_RECEIVER); /// alloc player zone + if (q==NULL) + { + printf("problème d'allocation \n"); + exit(1); + } + + for(i = 0; i < NUMBER_ZONE_RECEIVER; i++){ + q[i] = malloc(sizeof(float ***) * NUMBER_ZONE_SHOOTER ); // alloc shooter zone + if (q[i]==NULL) + { + printf("problème d'allocation \n"); + exit(1); + } + + for(j = 0; j< NUMBER_ZONE_SHOOTER; j++){ + q[i][j] = malloc(sizeof(float **) * 3 ); // alloc angle hauteur + + if (q[i][j]==NULL) + { + printf("problème d'allocation \n"); + exit(1); + } + + for(k = 0; k <3 ; k++){ + q[i][j][k] = malloc(sizeof(float *) * 5 ); // alloc angle plat + + if (q[i][j][k]==NULL) + { + printf("problème d'allocation \n"); + exit(1); + } + for(l = 0; l<5 ; l++){ + q[i][j][k][l] = malloc(sizeof(float ) * 5); //alloc action + + if (q[i][j][k][l]==NULL) + { + printf("problème d'allocation \n"); + exit(1); + } + for (m=0;m <5;m++){ + q[i][j][k][l][m]=0; + } + + } + } + } + } + return q; +} + +void writeQ(float *****Q){ + int i, j, k, l, m ; + FILE * fp = fopen("q.txt", "w+"); + for(i = 0; i < NUMBER_ZONE_RECEIVER; i++){ + for(j = 0; j < NUMBER_ZONE_SHOOTER; j++){ + for(k = 0; k < 3; k++){ + for(l= 0; l < 5; l++){ + for(m= 0; m <5; m++){ + fprintf(fp, "%f ", Q[i][j][k][l][m]); + }fprintf(fp, "\n"); + } + } + } + }fprintf(fp, "\n"); + fflush(fp); + fclose(fp); +} + + +int argmax(float * arr){ + int i; + float max = arr[0]; + int maxIndex = 0; + //printf("argmax: %f ", arr[0]); + for(i = 1; i < NUMBER_ACTION; i++){ + //printf("%f ", arr[i]); + if (arr[i] > max){ + max = arr[i]; + maxIndex = i; + } + } + //printf("\n"); + return maxIndex; +} + +int convertIntoZone(int xAgent,int yAgent){ + int zone=0; + xAgent=xAgent; + yAgent=yAgent; + if(xAgent> 15 && xAgent<23 && yAgent<=4){zone=0;} + else if(xAgent>22 && xAgent<31 && yAgent<=4){zone=1;} + else if(xAgent> 15 && xAgent<23 && yAgent<9){zone=2;} + else if(xAgent> 22 && xAgent<= 31&& yAgent<9){zone=3;} + + return zone ; +} +int convertIntoZoneCanon(int xCanon,int yCanon){ + int zone=0; + xCanon=xCanon; + yCanon=yCanon; + if(xCanon<9 && yCanon<=4){zone=0;} + else if(xCanon<15 && yCanon<=4){zone=1;} + else if(xCanon<9 && yCanon<9){zone=2;} + else if(xCanon<15&& yCanon<9){zone=3;} + + return zone ; +} + +int converterIntoAngleF(float angleF){ + int angleZone=0; + long angleFd=(long)(angleF*1000000); + if( ((long)(-M_PI/2*1000000)<=angleFd)&&(angleFd<=(long)((-M_PI/2+M_PI/5)*1000000))){angleZone=4;} + else if( ((long)((-(M_PI/2)+(M_PI/5))*1000000)<angleFd)&&(angleFd<=(long)((-M_PI/2+2*M_PI/5)*1000000))){angleZone=3;} + else if( (angleFd>(long)((-M_PI/2+2*M_PI/5)*1000000))&&(angleFd<=(long)((-M_PI/2+3*M_PI/5)*1000000))){angleZone=2;} + else if( (angleFd>(long)((-M_PI/2+3*M_PI/5)*1000000))&&(angleFd<=(long)((-M_PI/2+4*M_PI/5)*1000000))){angleZone=1;} + else if( (angleFd>(long)((-M_PI/2+4*M_PI/5)*1000000))&&(angleFd<=(long)(M_PI/2*1000000))){angleZone=0;} + + return(angleZone); +} + +int converterIntoAngleH(float angleH){ + int angleZone=0; + long angleHd=(long)(angleH*1000000); + if( (0<=angleHd)&&(angleHd<=(long)((M_PI/6)*1000000))){angleZone=0;} + else if(( ((long)((M_PI/6))*1000000)<angleHd)&&(angleHd<=(long)((M_PI/3)*1000000))){angleZone=1;} + else if( (angleHd>(long)((M_PI/3)*1000000))&&(angleHd<=(long)((M_PI/2)*1000000))){angleZone=2;} + + return(angleZone); +} + +int takeAction(int xAgent, int yAgent, float ***** Q, int canonZone, int angleHZone, int angleFZone, float eps){ + int action; + int proba = rand() % 10000; + int receiverZone=0; + if (proba < eps * 10000){ + if (xAgent > (MAP_WIDTH-1)/2+1 && xAgent < MAP_WIDTH- 2 && yAgent > 1 && yAgent < MAP_HEIGHT - 2){ + action = rand() % 5;// OK cas au centre + } + else if (xAgent == (MAP_WIDTH-1)/2+1 && yAgent > 1 && yAgent < MAP_HEIGHT - 2){ + int possibleActions[4] = {1, 2, 3,4}; + action = possibleActions[rand() % 4];// OK cas filet + } + else if (xAgent == (MAP_WIDTH-1)/2+1 && yAgent== 1){ + int possibleActions[3] = {1, 3, 4}; + action = possibleActions[rand() % 3];// cas en haut a gauche + } + else if (xAgent == (MAP_WIDTH-1)/2+1 && yAgent==MAP_HEIGHT - 2){ + int possibleActions[3] = {1, 2, 4}; + action = possibleActions[rand() % 3];// cas en bas a gauche + } + else if (yAgent ==1 && xAgent > (MAP_WIDTH-1)/2+1 && xAgent < MAP_WIDTH- 2){ + int possibleActions[4] = {0, 1,3,4}; + action = possibleActions[rand() % 4];// cas en haut au milieu + } + else if (xAgent == MAP_WIDTH- 2 && yAgent == 1){ + int possibleActions[3] = {0, 3,4}; + action = possibleActions[rand() % 3];// cas en haut a droite + } + else if (xAgent == MAP_WIDTH-2 && yAgent <MAP_HEIGHT-2 && yAgent>1){ + int possibleActions[4] = {0,2,3,4}; + action = possibleActions[rand() % 4];// cas a droite au milieu + } + else if (xAgent== MAP_WIDTH-2 && yAgent == MAP_HEIGHT-2){ + int possibleActions[3] = {0, 2,4}; + action = possibleActions[rand() % 3];// cas en bas a droite + } + else if (xAgent > (MAP_WIDTH-1)/2+1 && xAgent < MAP_WIDTH- 2 && yAgent == MAP_HEIGHT-2){ + int possibleActions[4] = {0,1,2,4}; + action = possibleActions[rand() % 4]; + } + else{ + action = rand() % 5; + } + } + else{ + receiverZone= convertIntoZone(xAgent,yAgent); + action = argmax(Q[receiverZone][canonZone][angleHZone][angleFZone]); + //printf("wtf"); + } + return action; +} + + +int setReward(int xAgent, int yAgent, int dropZone){ + int zoneAgent; + int reward=0; + + zoneAgent= convertIntoZone( xAgent,yAgent); + if (zoneAgent==dropZone){ + reward=1; + } + return (reward); +} + + +stack_t* initStack(int numberelt){ + stack_t *stack=NULL; + + stack=(stack_t *)malloc(sizeof(stack_t)); // allocation du ptr de tête. + + if (stack==NULL){ // vérification de l'allocation. + printf("problème d'allocation\n"); + exit(1); + } + + stack->base = (line_t *) malloc(numberelt*sizeof(line_t));// allocation de la stack de longueur numberelt. + + if (stack->base==NULL){ + printf("problème d'allocation\n"); + exit(1); + } + + stack->numberelt=numberelt; //ajout du nombre d'élément insérable dans la file. + stack->top=-1; //initialisation de l'indice du dernier élément. + + return (stack); +} + + +int emptyStack(stack_t *stack){ + + int result =0; + + if (stack->top==-1){ // test de l'indice du top. + + result=1; // la stack est vide. + } + + return (result); +} + +int fullStack(stack_t *stack) +{ + int result = 0; + + if (stack->numberelt == (stack->top)+1) //si le top est l'indice du dernier élément élément de la stack + { + result = 1; + } + + return result; +} + +void actionStack(stack_t *stack, line_t element) +{ + if(!fullStack(stack)) //si la stack n'est pas pleine + { + stack->base[(stack->top)+1] = element; //on ajoute l'élément à l'indice top+1 + stack->top = stack->top+1; //on incrémente l'indice du top + } + else + { + printf("Pile pleine\n" ); + } +} + +line_t unStack(stack_t *stack) +{ + line_t top; + + if(!emptyStack(stack)) //si la stack n'est pas vide + { + top = stack->base[stack->top]; //on récupère le top + stack->top = stack->top-1; //on décrémente l'indice du top + } + else + { + printf("Pile vide"); + } + + return top; +} + +void freeStack(stack_t *stack) +{ + if(stack != NULL) + { + free(stack->base); //on libère le tableau dynamique + free(stack); //on libère la tête de la stack + //printf("Pile libérée\n"); + } + else + { + printf("Libération impossible, stack == NULL\n"); + } +} + + + +void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir la bonne taille il faut diviser par block size + int i ; + int action; + point_t canon ; + point_t dropPoint ; + int canonZone; + int dropZone; + float angleH; + float angleF; + int zoneAngleH; + int zoneAngleF; + int agentZone; + int reward; + agent_t *agent; + stack_t *stack; + line_t line; + float greedy=1; + int maxAction; + stack= initStack(6000); + + while (numberRun>0){ + agent=initAgent(); + canon=initCanon(canon); + dropPoint= initDropPoint(dropPoint); + angleF=defineAngleF( canon.x, canon.x, dropPoint.x, dropPoint.y ); + angleH=defineAngleH( canon.x, canon.y ); + zoneAngleF=converterIntoAngleF(angleF); + zoneAngleH=converterIntoAngleH(angleH); + dropZone=convertIntoZone(dropPoint.x,dropPoint.y); + canonZone= convertIntoZoneCanon(canon.x,canon.y); + reward=0; + printf("%d %d %d %d \n",dropZone, canonZone,zoneAngleH,zoneAngleF); + printf("%d %d \n",agent->x, agent->y); + + for (i=0; i<numberStep-1;i++){ + action = takeAction(agent->x,agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); + agentZone = convertIntoZone(agent->x, agent->y); + line.receiverZone=agentZone; + line.shooterZone =canonZone; + line.angleHZone= zoneAngleH; + line.angleFZone= zoneAngleF; + line.action= action; + line.reward= reward ; + actionStack(stack,line); + moveAgent(agent, action); + + } + action = takeAction(agent->x, agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); + agentZone = convertIntoZone(agent->x, agent->y); + + line.receiverZone=agentZone; + line.shooterZone =canonZone; + line.angleHZone= zoneAngleH; + line.angleFZone= zoneAngleF; + line.action= action; + line.reward = 0; + // actionStack(stack,line); + moveAgent(agent, action); + if (agentZone==dropZone){ + reward=1; + } + else{reward= 0;} + + + Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += + + LEARN_RATE* ( reward - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] ); + + while (!emptyStack(stack)){ + maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]); + reward=line.reward; + line=unStack(stack); + + Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += + + LEARN_RATE* ( reward + DISCOUNT*Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction] + - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] ); + } + numberRun--; + greedy=greedy-1/((float)numberRun); + + if ( numberRun%1000000==1){printf (" %d \n ", numberRun);} + } + freeStack(stack); +} \ No newline at end of file diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h new file mode 100644 index 0000000000000000000000000000000000000000..60fe308715821fa9a24e68b723829f1d0e5b9c93 --- /dev/null +++ b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.h @@ -0,0 +1,75 @@ +#ifndef Q_LEARN +#define Q_LEARN + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include "ball.h" +#include "math.h" +#include "map.h" + +#define M_PI 3.14159265358979323846 + +#define LEARN_RATE 0.8 +#define DISCOUNT 0.2 + + + + +#define NUMBER_ZONE_SHOOTER 4 +#define NUMBER_ZONE_RECEIVER 4 +#define NUMBER_ACTION 5 + +#define FOWARD 0//<-- +#define BACK 1 //--> +#define UP 2 +#define DOWN 3 +#define WAIT 4 + + +typedef struct agent { + int x; + int y; + int high; + int weight; + int speed; +} agent_t; + +typedef struct line { + int receiverZone; + int shooterZone; + int angleHZone; + int angleFZone; + int action; + int reward; +}line_t; + +typedef struct stack +{ + line_t *base; + int numberelt; + int top; + +} stack_t; + + + +agent_t* initAgent ( ); +void moveAgent(agent_t * agent, int choice); +float ***** allocateAndInitiateQ(); +void writeQ(float *****); +int argmax(float * ); +int convertIntoZone(int ,int y); +int convertIntoZoneCanon(int xCanon,int yCanon); +int converterIntoAngleF(float); +int converterIntoAngleH(float); +int takeAction(int ,int , float ***** , int , int, int, float ); +int setReward(int , int , int ); +stack_t* initStack (int nbelt); +int emptyStack (stack_t *stack); +int fullStack(stack_t *stack); +void actionStack(stack_t *stack, line_t line); +line_t unStack(stack_t *stack); +void freeStack(stack_t *stack); +void traningAgent( int numberRun, int numberStep, float *****Q); +#endif \ No newline at end of file