Skip to content
Snippets Groups Projects
Commit b3a1cd8b authored by maberet's avatar maberet
Browse files

modif de la formule q learn

parent 22b6e2fe
No related branches found
No related tags found
No related merge requests found
......@@ -6,11 +6,13 @@ float ***** Q;
int main(){
Q = allocateAndInitiateQ();
readQFromFile(Q);
int i= 10;
int i= 50;
srand ( time(NULL));
while (i>0){
traningAgent(1000000,30, Q);
printf("%d \n ", i);
writeQ(Q);
i--;}
writeQ(Q);
......
......@@ -400,7 +400,6 @@ void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir
line.angleFZone=zoneAngleF;
line.action= action;
moveAgent(agent, action);
actionStack( stack , line);
agentZone= convertIntoZone( agent->x,agent->y );
reward = (agentZone==dropZone);
......@@ -412,16 +411,21 @@ void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir
while ( !emptyStack(stack)){
reward=line.reward;
maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]);
agentZone= line.receiverZone;
canonZone= line.shooterZone;
zoneAngleH= line.angleHZone;
zoneAngleF=line.angleFZone;
line=unStack(stack);
Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] +=
DISCOUNT*(reward +
LEARN_RATE* Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction]
LEARN_RATE* Q[agentZone][canonZone][zoneAngleH][zoneAngleF][maxAction]
-Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action]);
}
//if ( numberRun%1000000==1){printf (" %d \n ", numberRun);}
greedy = greedy - 1/numberRun;
//greedy = greedy - 1/numberRun;
numberRun--;
}
freeStack(stack);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment