Skip to content
Snippets Groups Projects
Commit 58a68fe2 authored by maberet's avatar maberet
Browse files

commit pour recup qlearn

parent 5977d61a
No related branches found
No related tags found
No related merge requests found
......@@ -6,11 +6,11 @@ int game_state;
int main(){
float ***** Q = allocateAndInitiateQ();
int i= 10;
int i= 1;
srand ( time(NULL));
while (i>0){
traningAgent(1000000,30, Q);
traningAgent(10000,30, Q);
i--;}
writeQ(Q);
......
......@@ -113,7 +113,7 @@ void writeQ(float *****Q){
int argmax(float * arr){
int i;
float max = arr[0];
float max = arr[rand()%5];
int maxIndex = 0;
//printf("argmax: %f ", arr[0]);
for(i = 1; i < NUMBER_ACTION; i++){
......@@ -321,7 +321,7 @@ void freeStack(stack_t *stack)
{
free(stack->base); //on libère le tableau dynamique
free(stack); //on libère la tête de la stack
printf("Pile libérée\n");
//printf("Pile libérée\n");
}
else
{
......@@ -334,12 +334,12 @@ void freeStack(stack_t *stack)
void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir la bonne taille il faut diviser par block size
int i ;
int action;
point_t canon ;
// point_t canon ;
point_t dropPoint ;
int canonZone;
int dropZone;
float angleH;
float angleF;
// float angleH;
// float angleF;
int zoneAngleH;
int zoneAngleF;
int agentZone;
......@@ -351,120 +351,76 @@ void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir
int maxAction;
stack= initStack(6000);
int j ,k,l,m;
while (numberRun>0){
agent=initAgent();
canon=initCanon(canon);
dropPoint= initDropPoint(dropPoint);
angleF=defineAngleF( canon.x, canon.x, dropPoint.x, dropPoint.y );
angleH=defineAngleH( canon.x, canon.y );
zoneAngleF=converterIntoAngleF(angleF);
zoneAngleH=converterIntoAngleH(angleH);
dropZone=convertIntoZone(dropPoint.x,dropPoint.y);
canonZone= convertIntoZoneCanon(canon.x,canon.y);
reward=0;
for ( i=0; i< numberStep - 1; i++ ){
action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy);
agentZone= convertIntoZone( agent->x,agent->y );
line.receiverZone= agentZone;
line.shooterZone= canonZone;
line.angleHZone=zoneAngleH;
line.angleFZone=zoneAngleF;
line.action= action;
line.reward= 0;
actionStack( stack , line);
moveAgent(agent, action);
}
action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy);
agentZone= convertIntoZone( agent->x,agent->y );
line.receiverZone= agentZone;
line.shooterZone= canonZone;
line.angleHZone=zoneAngleH;
line.angleFZone=zoneAngleF;
line.action= action;
moveAgent(agent, action);
actionStack( stack , line);
agentZone= convertIntoZone( agent->x,agent->y );
reward = (agentZone==dropZone);
Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=
LEARN_RATE* ( reward -
Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
while ( !emptyStack(stack)){
reward=line.reward;
maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]);
line=unStack(stack);
Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=
DISCOUNT*(reward +
LEARN_RATE* Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction]
-Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action]);
}
//if ( numberRun%1000000==1){printf (" %d \n ", numberRun);}
for( j=0; j<4;j++){
for (k=0;k<3;k++){
for (l=0; l<5;l++){
m=100;
while(m>0){
i= numberStep;
agent = initAgent();
zoneAngleF=l;
zoneAngleH=k;
dropPoint=initDropPoint(dropPoint);
dropZone=convertIntoZone(dropPoint.x,dropPoint.y);
canonZone= j;
reward=0;
//printf (" %d \n ", m);
while(i>0){
action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy);
agentZone= convertIntoZone( agent->x,agent->y );
line.receiverZone= agentZone;
line.shooterZone= canonZone;
line.angleHZone=zoneAngleH;
line.angleFZone=zoneAngleF;
line.action= action;
line.reward= (agentZone==dropZone);
actionStack( stack , line);
moveAgent(agent, action);
if ((agentZone==dropZone)){break;}
}
if ( (agentZone!=dropZone) ){ }
action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy);
agentZone= convertIntoZone( agent->x,agent->y );
line.receiverZone= agentZone;
line.shooterZone= canonZone;
line.angleHZone=zoneAngleH;
line.angleFZone=zoneAngleF;
line.action= action;
moveAgent(agent, action);
actionStack( stack , line);
agentZone= convertIntoZone( agent->x,agent->y );
reward = (agentZone==dropZone);
Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=
LEARN_RATE* ( reward -
Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
while ( !emptyStack(stack)){
reward=line.reward;
maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]);
line=unStack(stack);
Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=
DISCOUNT*(reward +
LEARN_RATE* Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction]
-Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action]);
}
m--;
}
}
}
} writeQ(Q);
if ( numberRun%10000==1){printf (" %d \n ", numberRun);}
greedy = greedy - 1/numberRun;
numberRun--;
}
freeStack(stack);
// printf("%d %d %d %d \n",dropZone, canonZone,zoneAngleH,zoneAngleF);
// printf("%d %d \n",agent->x, agent->y);
// for (i=0; i<numberStep-1;i++){
// action = takeAction(agent->x,agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy);
// agentZone = convertIntoZone(agent->x, agent->y);
// line.receiverZone=agentZone;
// line.shooterZone =canonZone;
// line.angleHZone= zoneAngleH;
// line.angleFZone= zoneAngleF;
// line.action= action;
// line.reward= reward ;
// actionStack(stack,line);
// moveAgent(agent, action);
// }
// action = takeAction(agent->x, agent->y,Q,canonZone,zoneAngleH,zoneAngleF,greedy);
// agentZone = convertIntoZone(agent->x, agent->y);
// line.receiverZone=agentZone;
// line.shooterZone =canonZone;
// line.angleHZone= zoneAngleH;
// line.angleFZone= zoneAngleF;
// line.action= action;
// line.reward = 0;
// // actionStack(stack,line);
// moveAgent(agent, action);
// if (agentZone==dropZone){
// reward=1;
// }
// else{reward= 0;}
// Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=
// + LEARN_RATE* ( reward - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
// while (!emptyStack(stack)){
// maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]);
// reward=line.reward;
// line=unStack(stack);
// Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=
// + LEARN_RATE* ( reward + DISCOUNT*Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction]
// - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] );
// }
// numberRun--;
// greedy=greedy-1/((float)numberRun);
// if ( numberRun%1000000==1){printf (" %d \n ", numberRun);}
// }
// freeStack(stack);
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment