Skip to content
Snippets Groups Projects
qlearn.c 12.91 KiB
#include "qlearn.h"

agent_t * initAgent (){
    agent_t * agent =(agent_t*)malloc(sizeof(agent_t));
    if (agent ==NULL){
        printf("erreur alloc\n  ");
        exit (1);
    }  
    agent->x=(16+rand()%14);
    agent->y=(1+rand()%8);
    agent->high=2;
    agent->weight=2; 
    agent->speed = 1;   
      // si changement de speed => changement de collisiosn dans le takeaction 
      return(agent);
}   

void moveAgent(agent_t * agent, int choice){
    switch (choice)
    {
    case BACK:
        agent->x += 1*agent->speed; //Avancer 
        break;

    case FOWARD:
        agent->x -= 1*agent->speed; // reculer 
        break;

    case UP:
        agent->y += 1*agent->speed;
        break;

    case DOWN:
        agent->y -= 1*agent->speed;
        break;
    case WAIT: 
        break; 
    }
}

float ***** allocateAndInitiateQ(){
    int i,j,k,l,m;
    
    float ***** q = malloc(sizeof(float ****) * NUMBER_ZONE_RECEIVER); /// alloc player zone 
    if (q==NULL)
    {
        printf("problème d'allocation \n");
        exit(1);
    }

    for(i = 0; i <  NUMBER_ZONE_RECEIVER; i++){  
        q[i] = malloc(sizeof(float ***) * NUMBER_ZONE_SHOOTER ); // alloc shooter zone 
        if (q[i]==NULL)
        { 
            printf("problème d'allocation \n");            
            exit(1);
        }

        for(j = 0; j< NUMBER_ZONE_SHOOTER; j++){
            q[i][j] = malloc(sizeof(float **) * 3 ); // alloc angle hauteur 

            if (q[i][j]==NULL)
            {
                printf("problème d'allocation \n");            
                exit(1);
            }

            for(k = 0; k <3 ; k++){
                q[i][j][k] = malloc(sizeof(float *) * 5 ); // alloc angle plat 
                if (q[i][j][k]==NULL)
                {
                    printf("problème d'allocation \n");            
                    exit(1);
                }
                for(l = 0; l<5 ; l++){
                    q[i][j][k][l] = malloc(sizeof(float ) * 5); //alloc action 

                    if (q[i][j][k][l]==NULL)
                    {
                        printf("problème d'allocation \n");            
                        exit(1);
                    }
                    for (m=0;m <5;m++){
                        q[i][j][k][l][m]=0;
                    }
                        
                }
            }
        }
    }
    return q;
}

void writeQ(float *****Q){
    int i, j, k, l, m ;
    FILE * fp = fopen("q.txt", "w+");
    for(i = 0; i < NUMBER_ZONE_RECEIVER; i++){
        for(j = 0; j < NUMBER_ZONE_SHOOTER; j++){
            for(k = 0; k < 3; k++){
                for(l= 0; l < 5; l++){
                    for(m= 0; m <5; m++){
                         fprintf(fp, "%f ", Q[i][j][k][l][m]);
                    }fprintf(fp, "\n");
                }
            }
        }
    }fprintf(fp, "\n");
    fflush(fp);
    fclose(fp);
}

void readQFromFile(float *****Q){
    int i, j, k, l, m ;
    FILE * fp = fopen("q.txt", "r");
    for(i = 0; i < NUMBER_ZONE_RECEIVER; i++){
        for(j = 0; j < NUMBER_ZONE_SHOOTER; j++){
            for(k = 0; k < 3; k++){
                for(l= 0; l < 5; l++){
                    for(m= 0; m <5; m++){
                         fscanf(fp, "%f ", &Q[i][j][k][l][m]);
                    }
                }
            }
        }
    }
    fclose(fp);
}


int argmax(float * arr){
    int i;
    float max = arr[0];
    int maxIndex = 0;
    //printf("argmax:  %f ", arr[0]);
    for(i = 1; i < NUMBER_ACTION; i++){
        //printf("%f ", arr[i]);
        if (arr[i] > max){
            max = arr[i];
            maxIndex = i;
        }
    }
    //printf("\n");
    return maxIndex;
}

int convertIntoZone(int xAgent,int yAgent){
    int zone=0; 
    xAgent=xAgent;
    yAgent=yAgent;
    if(xAgent> 15 && xAgent<23 && yAgent<=4){zone=0;} 
    else if(xAgent>22 && xAgent<31 && yAgent<=4){zone=1;} 
    else if(xAgent> 15 && xAgent<23 && yAgent<9){zone=2;}
    else if(xAgent> 22 && xAgent<= 31&& yAgent<9){zone=3;}
     
    return zone ; 
}
int convertIntoZoneCanon(int xCanon,int yCanon){
    int zone=0; 
    xCanon=xCanon;
    yCanon=yCanon;
    if(xCanon<9 && yCanon<=4){zone=0;} 
    else if(xCanon<15 && yCanon<=4){zone=1;} 
    else if(xCanon<9 && yCanon<9){zone=2;} 
    else if(xCanon<15&& yCanon<9){zone=3;}
     
    return zone ; 
}

int converterIntoAngleF(float angleF){
    int angleZone=0;
    long angleFd=(long)(angleF*1000000); 
    if( ((long)(-M_PI/2*1000000)<=angleFd)&&(angleFd<=(long)((-M_PI/2+M_PI/5)*1000000))){angleZone=4;} 
    else if( ((long)((-(M_PI/2)+(M_PI/5))*1000000)<angleFd)&&(angleFd<=(long)((-M_PI/2+2*M_PI/5)*1000000))){angleZone=3;} 
    else if( (angleFd>(long)((-M_PI/2+2*M_PI/5)*1000000))&&(angleFd<=(long)((-M_PI/2+3*M_PI/5)*1000000))){angleZone=2;}
    else if( (angleFd>(long)((-M_PI/2+3*M_PI/5)*1000000))&&(angleFd<=(long)((-M_PI/2+4*M_PI/5)*1000000))){angleZone=1;}
    else if( (angleFd>(long)((-M_PI/2+4*M_PI/5)*1000000))&&(angleFd<=(long)(M_PI/2*1000000))){angleZone=0;}   
    
    return(angleZone);    
} 

int converterIntoAngleH(float angleH){
    int angleZone=0;
    long angleHd=(long)(angleH*1000000); 
    if( (0<=angleHd)&&(angleHd<=(long)((M_PI/6)*1000000))){angleZone=0;} 
    else if(( ((long)((M_PI/6))*1000000)<angleHd)&&(angleHd<=(long)((M_PI/3)*1000000))){angleZone=1;} 
    else if( (angleHd>(long)((M_PI/3)*1000000))&&(angleHd<=(long)((M_PI/2)*1000000))){angleZone=2;} 
    
    return(angleZone);    
} 

int takeAction(int xAgent, int yAgent, float ***** Q, int canonZone, int angleHZone, int angleFZone, float eps){
    int action;
    int proba = rand() % 10000;
    int receiverZone=0;
    if (proba < eps * 10000){
        if (xAgent > (MAP_WIDTH-1)/2+1 && xAgent < MAP_WIDTH- 2 && yAgent > 1 && yAgent < MAP_HEIGHT - 2){
            action = rand() % 5;// OK cas au centre
        }
        else if (xAgent == (MAP_WIDTH-1)/2+1 && yAgent > 1 && yAgent < MAP_HEIGHT - 2){
            int possibleActions[4] = {1, 2, 3,4};
            action = possibleActions[rand() % 4];// OK cas filet 
        }
        else if (xAgent == (MAP_WIDTH-1)/2+1 && yAgent== 1){
            int possibleActions[3] = {1, 3, 4};
            action = possibleActions[rand() % 3];// cas en haut a gauche 
        }
        else if (xAgent == (MAP_WIDTH-1)/2+1  && yAgent==MAP_HEIGHT - 2){
            int possibleActions[3] = {1, 2, 4};
            action = possibleActions[rand() % 3];// cas en bas a gauche 
        }
        else if (yAgent ==1 && xAgent > (MAP_WIDTH-1)/2+1 && xAgent < MAP_WIDTH- 2){
            int possibleActions[4] = {0, 1,3,4};
            action = possibleActions[rand() % 4];// cas en haut au milieu  
        }
        else if (xAgent == MAP_WIDTH- 2 && yAgent == 1){
            int possibleActions[3] = {0, 3,4};
            action = possibleActions[rand() % 3];// cas en haut a droite
        }
        else if (xAgent ==  MAP_WIDTH-2  && yAgent <MAP_HEIGHT-2 && yAgent>1){
            int possibleActions[4] = {0,2,3,4};
            action = possibleActions[rand() % 4];// cas a droite au milieu 
        }
        else if (xAgent== MAP_WIDTH-2 && yAgent == MAP_HEIGHT-2){
            int possibleActions[3] = {0, 2,4};
            action = possibleActions[rand() % 3];// cas en bas a droite 
        }
        else if (xAgent > (MAP_WIDTH-1)/2+1 && xAgent < MAP_WIDTH- 2 && yAgent == MAP_HEIGHT-2){
            int possibleActions[4] = {0,1,2,4};
            action = possibleActions[rand() % 4];
        }
        else{
            action = rand() % 5;
        }
    }
    else{
        receiverZone= convertIntoZone(xAgent,yAgent);
        action = argmax(Q[receiverZone][canonZone][angleHZone][angleFZone]);
        //printf("wtf");
    }
    return action;
}


int setReward(int xAgent, int yAgent, int dropZone){
    int zoneAgent; 
    int reward=0; 

    zoneAgent= convertIntoZone( xAgent,yAgent); 
    if (zoneAgent==dropZone){
        reward=1;
    } 
    return (reward); 
} 


stack_t* initStack(int numberelt){
    stack_t          *stack=NULL; 

    stack=(stack_t *)malloc(sizeof(stack_t)); // allocation du ptr de tête. 

    if (stack==NULL){  // vérification de l'allocation.   
        printf("problème d'allocation\n");
        exit(1);
    }

    stack->base = (line_t *) malloc(numberelt*sizeof(line_t));// allocation de la stack de longueur numberelt. 

    if (stack->base==NULL){
        printf("problème d'allocation\n");
        exit(1);
    }

    stack->numberelt=numberelt; //ajout du nombre d'élément insérable dans la file.
    stack->top=-1; //initialisation de l'indice du dernier élément.
    
    return (stack);
}

int emptyStack(stack_t *stack){
    
   int        result =0; 

    if (stack->top==-1){   // test de l'indice du top. 
        
        result=1;  // la stack est vide.
    }

    return (result);
}

int fullStack(stack_t *stack)
{
    int result = 0;

    if (stack->numberelt == (stack->top)+1) //si le top est l'indice du dernier élément élément de la stack
    {
        result = 1;
    }

    return result;
}

void actionStack(stack_t *stack, line_t element)
{
    if(!fullStack(stack)) //si la stack n'est pas pleine
    {
        stack->base[(stack->top)+1] = element; //on ajoute l'élément à l'indice top+1
        stack->top = stack->top+1; //on incrémente l'indice du top
    }
    else
    {
        printf("Pile pleine\n" );
    }
}

line_t unStack(stack_t *stack)
{
    line_t top;
    
    if(!emptyStack(stack)) //si la stack n'est pas vide
    {
        top = stack->base[stack->top]; //on récupère le top
        stack->top = stack->top-1; //on décrémente l'indice du top
    }
    else
    {
        printf("Pile vide");
    }

    return top;
}

void freeStack(stack_t *stack)
{
    if(stack != NULL)
    {
        free(stack->base); //on libère le tableau dynamique
        free(stack);    //on libère la tête de la stack
        //printf("Pile libérée\n");
    }
    else
    {
        printf("Libération impossible, stack == NULL\n");
    }
}


void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir la bonne taille il faut diviser par block size 
    int i ; 
    int action;
    point_t canon ; 
    point_t dropPoint ; 
    int canonZone; 
    int dropZone; 
    float angleH;
    float angleF;
    int zoneAngleH;
    int zoneAngleF; 
    int agentZone; 
    int reward;
    agent_t *agent; 
    stack_t *stack;
    line_t line; 
    float greedy=1; 
    int maxAction;
    stack= initStack(6000); 
    
    while (numberRun>0){
        agent=initAgent(); 
        canon=initCanon(canon); 
        dropPoint= initDropPoint(dropPoint);
        angleF=defineAngleF( canon.x, canon.x, dropPoint.x, dropPoint.y );
        angleH=defineAngleH( canon.x, canon.y ); 
        zoneAngleF=converterIntoAngleF(angleF);
        zoneAngleH=converterIntoAngleH(angleH);
        dropZone=convertIntoZone(dropPoint.x,dropPoint.y); 
        canonZone= convertIntoZoneCanon(canon.x,canon.y); 
        reward=0; 
        printf("%d %d %d %d \n",dropZone, canonZone,zoneAngleH,zoneAngleF);
        printf("%d %d  \n",agent->x, agent->y);

        for (i=0; i<numberStep-1;i++){ 
            action = takeAction(agent->x,agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); 
            agentZone = convertIntoZone(agent->x, agent->y); 
            line.receiverZone=agentZone; 
            line.shooterZone =canonZone; 
            line.angleHZone= zoneAngleH; 
            line.angleFZone= zoneAngleF; 
            line.action= action;
            line.reward= reward ; 
            actionStack(stack,line);
            moveAgent(agent, action);

        }
        action = takeAction(agent->x, agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy); 
        agentZone = convertIntoZone(agent->x, agent->y); 
       
        line.receiverZone=agentZone; 
        line.shooterZone =canonZone; 
        line.angleHZone= zoneAngleH; 
        line.angleFZone= zoneAngleF; 
        line.action= action;
        line.reward = 0; 
       // actionStack(stack,line);
        moveAgent(agent, action);
         if (agentZone==dropZone){ 
                   reward=1; 
                }
                else{reward= 0;}
        

        Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=  
                    + LEARN_RATE* ( reward - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
       
        while (!emptyStack(stack)){
            maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]);
            reward=line.reward;
            line=unStack(stack);

            Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=  
                    + LEARN_RATE* ( reward +  DISCOUNT*Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction]
                    - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
        }  
        numberRun--; 
        greedy=greedy-1/((float)numberRun);

        if ( numberRun%1000000==1){printf (" %d \n  ", numberRun);} 
    } 
    freeStack(stack);
}