From 1c660641c8725ffa7bf67dfe17e18182ab3fc260 Mon Sep 17 00:00:00 2001 From: maberet <maberet@ada.local.isima.fr> Date: Thu, 30 Jun 2022 16:06:27 +0200 Subject: [PATCH] =?UTF-8?q?=20commit=20de=20matrices=20et=20recup=C3=A9rat?= =?UTF-8?q?ion=20pour=20branch=20q=20learn?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ... decroissances et corr\303\251lation .txt" | 241 ++++++++++++++++++ .../jeu_appren_par_renfo/src/main.c | 4 +- .../jeu_appren_par_renfo/src/qlearn.c | 108 +++----- 3 files changed, 272 insertions(+), 81 deletions(-) create mode 100644 "travail_de_groupe/jeu_appren_par_renfo/matriceQ/matrice learn sans decroissances et corr\303\251lation .txt" diff --git "a/travail_de_groupe/jeu_appren_par_renfo/matriceQ/matrice learn sans decroissances et corr\303\251lation .txt" "b/travail_de_groupe/jeu_appren_par_renfo/matriceQ/matrice learn sans decroissances et corr\303\251lation .txt" new file mode 100644 index 0000000..0fe2b5b --- /dev/null +++ "b/travail_de_groupe/jeu_appren_par_renfo/matriceQ/matrice learn sans decroissances et corr\303\251lation .txt" @@ -0,0 +1,241 @@ +0.859472 0.374663 0.000045 0.000044 0.000040 +0.638730 0.900888 0.000038 0.000037 0.000041 +0.209128 0.215602 0.000032 0.000032 0.000027 +0.578615 0.415854 0.000111 0.000111 0.000129 +0.036245 0.805192 0.000038 0.000043 0.000030 +0.944203 0.000029 0.998729 0.000024 0.000029 +0.087830 0.815761 0.000099 0.000110 0.000080 +0.294968 0.879795 0.000061 0.000063 0.000066 +0.056187 0.000044 0.059691 0.000053 0.000043 +0.237420 0.505721 0.000041 0.000040 0.000048 +0.017198 0.187212 0.000035 0.000037 0.000037 +0.023171 0.000034 0.083911 0.000028 0.000036 +0.069218 0.032794 0.000080 0.000069 0.000079 +0.413235 0.892657 0.000179 0.000182 0.000155 +0.967233 0.178440 0.000229 0.000214 0.000210 +0.405528 0.261932 0.000339 0.000322 0.000308 +0.763722 0.576522 0.000059 0.000086 0.000071 +0.227889 0.844402 0.000056 0.000059 0.000051 +0.480519 0.597619 0.000048 0.000038 0.000047 +0.765609 0.550356 0.000047 0.000058 0.000080 +0.182967 0.730294 0.000150 0.000162 0.000154 +0.468006 0.832489 0.000052 0.000050 0.000055 +0.113326 0.081858 0.000048 0.000041 0.000056 +0.994994 0.407514 0.000056 0.000047 0.000057 +0.839681 0.835277 0.000083 0.000082 0.000062 +0.262988 0.234521 0.000149 0.000133 0.000133 +0.076771 0.293424 0.000076 0.000088 0.000081 +0.353078 0.938189 0.000233 0.000221 0.000245 +0.119717 0.964956 0.000030 0.000083 0.000095 +0.100657 0.000019 0.000018 0.202047 0.000016 +0.347076 0.875221 0.000048 0.000061 0.000063 +0.070941 0.114196 0.000240 0.000267 0.000268 +0.260930 0.323693 0.000271 0.000251 0.000298 +0.108177 0.992747 0.000154 0.000123 0.000137 +0.430552 0.611790 0.000046 0.000056 0.000053 +0.865532 0.000025 0.953190 0.000025 0.000029 +0.953845 0.952750 0.000035 0.000030 0.000036 +0.474282 0.000034 0.546885 0.000031 0.000031 +0.944099 0.000046 0.297283 0.000047 0.000043 +0.304277 0.000034 0.504273 0.000034 0.000029 +0.315889 0.430747 0.000089 0.000071 0.000084 +0.379640 0.351436 0.000015 0.000020 0.000025 +0.074477 0.190061 0.000257 0.000239 0.000265 +0.361961 0.894853 0.000054 0.000051 0.000050 +0.129798 0.117288 0.000066 0.000071 0.000054 +0.719365 0.000124 0.727454 0.000135 0.000123 +0.117384 0.852434 0.000064 0.000064 0.000059 +0.960656 0.823301 0.000070 0.000077 0.000083 +0.408601 0.731694 0.000041 0.000039 0.000034 +0.188708 0.308631 0.000084 0.000076 0.000064 +0.495610 0.146224 0.001351 0.001182 0.001330 +0.073430 0.071711 0.000079 0.000081 0.000087 +0.820206 0.000020 0.781166 0.000017 0.000015 +0.280513 0.309173 0.000155 0.000193 0.000158 +0.892248 0.886107 0.000092 0.000082 0.000077 +0.362828 0.882052 0.000068 0.000055 0.000069 +0.258293 0.976224 0.000116 0.000125 0.000114 +0.309898 0.357003 0.000031 0.000034 0.000034 +0.212145 0.969597 0.000105 0.000086 0.000098 +0.181404 0.227329 0.000025 0.000027 0.000025 +0.697985 0.032355 0.661617 0.030974 0.029701 +0.890255 0.021216 0.745476 0.016283 0.022304 +0.715205 0.026983 0.702801 0.023722 0.024369 +0.996494 0.011352 0.893517 0.014648 0.017001 +0.172980 0.019895 0.860332 0.020188 0.020209 +0.522614 0.011159 0.780478 0.011465 0.011264 +0.517683 0.029334 0.640197 0.023081 0.028953 +0.398802 0.020222 0.429286 0.018679 0.023214 +0.325923 0.027876 0.403023 0.032343 0.025299 +0.680309 0.022618 0.989308 0.018152 0.022633 +0.091655 0.024381 0.287640 0.020884 0.022923 +0.877671 0.027869 0.870531 0.029291 0.032490 +0.715108 0.011288 0.746392 0.004352 0.010709 +0.888560 0.015251 0.639395 0.016707 0.012226 +0.331413 0.012299 0.451058 0.010292 0.012052 +0.480146 0.010985 0.973399 0.007023 0.011132 +0.642852 0.026476 0.590350 0.023645 0.021682 +0.583832 0.019040 0.542210 0.018177 0.015375 +0.649101 0.021189 0.906142 0.025096 0.025038 +0.500496 0.019560 0.723469 0.024231 0.026638 +0.942451 0.020074 0.609231 0.017834 0.021829 +0.439322 0.012179 0.610385 0.008317 0.008498 +0.853214 0.022912 0.766186 0.015404 0.024048 +0.481305 0.019436 0.379524 0.019122 0.017216 +0.444509 0.023186 0.752372 0.014984 0.021328 +0.429078 0.013357 0.097166 0.009577 0.016615 +0.404657 0.006300 0.714776 0.010411 0.012517 +0.239072 0.021398 0.200224 0.026235 0.017862 +0.327942 0.021284 0.395492 0.020636 0.018440 +0.390151 0.020385 0.378459 0.019447 0.019699 +0.914536 0.020572 0.941833 0.021247 0.015585 +0.171885 0.020070 0.310007 0.016367 0.017214 +0.980053 0.015449 0.893198 0.011886 0.011647 +0.873043 0.025614 0.846775 0.028398 0.024295 +0.759108 0.010491 0.827022 0.034250 0.032344 +0.685408 0.009827 0.718882 0.012377 0.011277 +0.312818 0.011708 0.988908 0.011972 0.013108 +0.357900 0.013120 0.300279 0.013834 0.011882 +0.436795 0.019571 0.974825 0.024036 0.021490 +0.922309 0.024658 0.480993 0.024292 0.024686 +0.508533 0.023799 0.629982 0.025907 0.026196 +0.589025 0.613191 0.015459 0.015299 0.016756 +0.656659 0.010233 0.634675 0.014694 0.013088 +0.509232 0.012120 0.617589 0.014962 0.014356 +0.700150 0.022247 0.921749 0.027846 0.019750 +0.340017 0.025565 0.834488 0.028367 0.019393 +0.230825 0.010901 0.478043 0.025827 0.024044 +0.537069 0.016522 0.649418 0.010685 0.013431 +0.505943 0.023345 0.981644 0.022416 0.014262 +0.748176 0.022666 0.943690 0.024035 0.023134 +0.942774 0.033429 0.505842 0.030735 0.026375 +0.955558 0.026025 0.589300 0.026004 0.017539 +0.434479 0.037919 0.863742 0.036347 0.035896 +0.950855 0.018384 0.601599 0.014975 0.015688 +0.751051 0.007987 0.341084 0.008936 0.007102 +0.483948 0.022796 0.541835 0.027177 0.020401 +0.536088 0.018087 0.413181 0.014118 0.015194 +0.556027 0.029022 0.972398 0.025973 0.012029 +0.568842 0.009601 0.399771 0.010160 0.009039 +0.389182 0.018299 0.628144 0.018882 0.014106 +0.500685 0.012610 0.006422 0.684987 0.013034 +0.973654 0.010640 0.010973 0.718868 0.005607 +0.981758 0.012612 0.013928 0.783137 0.014450 +0.480860 0.010507 0.012730 0.435569 0.010182 +0.776943 0.006704 0.584571 0.011790 0.009674 +0.302005 0.832031 0.007803 0.008690 0.007846 +0.497749 0.035371 0.402178 0.032273 0.036565 +0.629004 0.009256 0.996815 0.010375 0.010554 +0.559054 0.015664 0.387339 0.011300 0.016194 +0.545005 0.009290 0.013128 0.432794 0.010863 +0.302384 0.012167 0.965774 0.012967 0.014854 +0.463310 0.012570 0.416391 0.009824 0.013324 +0.973308 0.013687 0.007065 0.584728 0.013649 +0.976101 0.012524 0.011967 0.942506 0.013469 +0.902345 0.013546 0.467462 0.013631 0.011800 +0.957009 0.011787 0.747080 0.017070 0.019876 +0.504182 0.017337 0.018804 0.980058 0.014647 +0.845695 0.015125 0.813845 0.014844 0.015038 +0.616836 0.020908 0.021029 0.519819 0.024319 +0.641671 0.020829 0.022036 0.565378 0.021803 +0.767606 0.019801 0.018875 0.682215 0.020657 +0.655487 0.938846 0.013609 0.016572 0.019365 +0.452370 0.016463 0.011541 0.606666 0.013870 +0.828997 0.010496 0.740451 0.012112 0.011771 +0.387554 0.013096 0.014802 0.615951 0.014140 +0.833810 0.018816 0.015416 0.739741 0.018441 +0.997943 0.017848 0.011137 0.488672 0.022751 +0.840806 0.015273 0.019565 0.815872 0.016182 +0.786542 0.008687 0.008839 0.489117 0.009881 +0.252177 0.010723 0.130114 0.010762 0.011141 +0.927819 0.009538 0.016399 0.953030 0.017521 +0.366418 0.015720 0.879443 0.013135 0.013450 +0.767414 0.038773 0.038106 0.450730 0.032225 +0.567275 0.020680 0.434975 0.017590 0.018923 +0.982972 0.020793 0.013764 0.792112 0.016950 +0.845715 0.715470 0.009607 0.010624 0.010422 +0.456584 0.003917 0.012532 0.567324 0.013198 +0.619315 0.382853 0.011313 0.008101 0.015353 +0.928502 0.850791 0.007311 0.003309 0.008856 +0.863746 0.315788 0.015348 0.011819 0.015674 +0.737726 0.015115 0.017766 0.801662 0.016917 +0.630503 0.014947 0.018990 0.577694 0.017395 +0.695054 0.018645 0.011728 0.934636 0.014349 +0.982089 0.019118 0.018898 0.822159 0.019796 +0.374802 0.014126 0.009195 0.590238 0.015493 +0.997344 0.845719 0.009803 0.008756 0.011257 +0.475727 0.022646 0.921508 0.017957 0.017703 +0.362733 0.916663 0.013544 0.012258 0.013372 +0.465309 0.015415 0.013759 0.467171 0.013509 +0.951323 0.015654 0.011943 0.633650 0.013730 +0.413040 0.016681 0.018593 0.956975 0.016074 +0.419459 0.010378 0.937294 0.008689 0.009984 +0.154309 0.134683 0.011568 0.010363 0.011963 +0.851974 0.014126 0.014157 0.737192 0.012872 +0.545803 0.012475 0.014324 0.691225 0.012289 +0.912250 0.009044 0.007146 0.791949 0.007092 +0.795915 0.019662 0.438847 0.020298 0.016481 +0.987208 0.009213 0.006252 0.783106 0.010634 +0.238355 0.239309 0.008449 0.008441 0.008321 +0.993009 0.011505 0.007355 0.945241 0.013187 +0.925170 0.034669 0.586014 0.034873 0.035145 +0.953185 0.027085 0.914608 0.030165 0.028784 +0.974978 0.018956 0.884348 0.019652 0.017029 +0.613534 0.030416 0.918815 0.022565 0.027455 +0.393762 0.013072 0.030104 0.315081 0.031334 +0.938666 0.014617 0.014242 0.483753 0.016958 +0.956596 0.041568 0.779173 0.030153 0.040397 +0.565922 0.025641 0.019370 0.399612 0.021778 +0.322288 0.030891 0.020495 0.652942 0.018508 +0.750370 0.013394 0.014536 0.815117 0.013711 +0.899079 0.015993 0.010753 0.447670 0.017646 +0.539806 0.030028 0.032280 0.804526 0.014775 +0.697475 0.015770 0.855859 0.023116 0.026219 +0.881388 0.018128 0.011467 0.016090 0.495562 +0.675092 0.013487 0.013826 0.356853 0.017847 +0.171848 0.021246 0.020132 0.406610 0.021597 +0.244406 0.026951 0.030450 0.026467 0.230883 +0.801753 0.020389 0.019582 0.515948 0.020107 +0.556942 0.029546 0.417276 0.015838 0.016401 +0.961571 0.028824 0.994770 0.033575 0.033986 +0.879132 0.022250 0.017559 0.005491 0.221015 +0.593177 0.015910 0.015988 0.536991 0.014506 +0.995752 0.021248 0.715841 0.025470 0.025194 +0.433523 0.026582 0.026697 0.389712 0.021939 +0.495679 0.038052 0.878999 0.029919 0.035935 +0.916923 0.005700 0.015568 0.897847 0.010664 +0.892212 0.005328 0.621805 0.013148 0.015068 +0.435783 0.019884 0.016932 0.204198 0.022445 +0.536804 0.017641 0.010062 0.339462 0.017040 +0.357291 0.016520 0.302854 0.019471 0.017648 +0.749615 0.017935 0.015629 0.673341 0.019627 +0.421949 0.023513 0.027686 0.849253 0.025798 +0.421824 0.031682 0.026384 0.976197 0.034843 +0.244457 0.031551 0.029607 0.344371 0.018598 +0.580323 0.031288 0.744496 0.037561 0.032315 +0.666025 0.018682 0.017655 0.801898 0.012455 +0.232430 0.014233 0.016024 0.017682 0.823742 +0.318798 0.014529 0.008509 0.329260 0.011980 +0.456407 0.028625 0.029256 0.405104 0.029685 +0.466901 0.025670 0.024932 0.581076 0.026036 +0.695806 0.028359 0.968157 0.020438 0.027455 +0.760418 0.914676 0.024246 0.023490 0.019599 +0.564767 0.016281 0.641402 0.018374 0.016910 +0.795901 0.021628 0.991672 0.026489 0.026536 +0.718554 0.020575 0.724835 0.018116 0.022265 +0.296646 0.023407 0.013398 0.921679 0.029250 +0.920767 0.029849 0.025219 0.766898 0.033115 +0.538628 0.024795 0.965237 0.025887 0.027051 +0.677096 0.031363 0.925530 0.024475 0.021835 +0.947851 0.031002 0.624072 0.033026 0.032881 +0.742992 0.027530 0.030826 0.032933 0.462396 +0.537264 0.033054 0.019145 0.501063 0.033893 +0.315959 0.015685 0.019252 0.196691 0.018744 +0.921857 0.015627 0.019401 0.910548 0.016757 +0.351236 0.014535 0.016770 0.480002 0.015726 +0.841519 0.022921 0.026516 0.549561 0.026332 +0.553094 0.032907 0.034357 0.366992 0.032701 +0.587959 0.028463 0.026078 0.906900 0.025585 +0.388453 0.006259 0.011614 0.344376 0.008950 +0.932203 0.016751 0.782068 0.014890 0.016333 + diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/main.c b/travail_de_groupe/jeu_appren_par_renfo/src/main.c index 01a53aa..8a59783 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/main.c +++ b/travail_de_groupe/jeu_appren_par_renfo/src/main.c @@ -5,11 +5,11 @@ int game_state; float ***** Q; int main(){ float ***** Q = allocateAndInitiateQ(); - int i= 1; + int i= 10; srand ( time(NULL)); while (i>0){ - traningAgent(10000,30, Q); + traningAgent(100000,30, Q); i--;} writeQ(Q); diff --git a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c index 46dfd20..7850eb5 100644 --- a/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c +++ b/travail_de_groupe/jeu_appren_par_renfo/src/qlearn.c @@ -368,11 +368,10 @@ void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir int maxAction; stack= initStack(6000); - int j ,k,l,m; - - while (numberRun>0){ + int j ,k,l,m, n ; + n = 0; + while (n<numberRun){ agent=initAgent(); -<<<<<<< HEAD for( j=0; j<4;j++){ for (k=0;k<3;k++){ for (l=0; l<5;l++){ @@ -395,38 +394,47 @@ void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir line.angleHZone=zoneAngleH; line.angleFZone=zoneAngleF; line.action= action; - line.reward= (agentZone==dropZone); + line.reward= 0; actionStack( stack , line); moveAgent(agent, action); - if ((agentZone==dropZone)){break;} + reward=(agentZone==dropZone); + if ((agentZone==dropZone)){ + break;} + i--; } - if ( (agentZone!=dropZone) ){ } - action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy); - agentZone= convertIntoZone( agent->x,agent->y ); - line.receiverZone= agentZone; - line.shooterZone= canonZone; - line.angleHZone=zoneAngleH; - line.angleFZone=zoneAngleF; - line.action= action; - moveAgent(agent, action); - actionStack( stack , line); - agentZone= convertIntoZone( agent->x,agent->y ); + if ( (agentZone!=dropZone) ){ + action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy); + agentZone= convertIntoZone( agent->x,agent->y ); + line.receiverZone= agentZone; + line.shooterZone= canonZone; + line.angleHZone=zoneAngleH; + line.angleFZone=zoneAngleF; + line.action= action; + moveAgent(agent, action); + actionStack( stack , line); + agentZone= convertIntoZone( agent->x,agent->y ); + } + line = unStack(stack); reward = (agentZone==dropZone); Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += LEARN_RATE* ( reward - - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] ); + Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] ); while ( !emptyStack(stack)){ reward=line.reward; maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]); + agentZone=line.receiverZone; + canonZone=line.shooterZone; + zoneAngleH=line.angleHZone; + zoneAngleF=line.angleFZone; line=unStack(stack); Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += DISCOUNT*(reward + - LEARN_RATE* Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][maxAction] + LEARN_RATE* Q[agentZone][canonZone][zoneAngleH][zoneAngleF][maxAction] -Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action]); } m--; @@ -437,66 +445,8 @@ void traningAgent ( int numberRun, int numberStep, float *****Q) {// pour avoir } } writeQ(Q); if ( numberRun%10000==1){printf (" %d \n ", numberRun);} - greedy = greedy - 1/numberRun; -======= - canon=initCanon(canon); - dropPoint= initDropPoint(dropPoint); - angleF=defineAngleF( canon.x, canon.x, dropPoint.x, dropPoint.y ); - angleH=defineAngleH( canon.x, canon.y ); - zoneAngleF=converterIntoAngleF(angleF); - zoneAngleH=converterIntoAngleH(angleH); - dropZone=convertIntoZone(dropPoint.x,dropPoint.y); - canonZone= convertIntoZoneCanon(canon.x,canon.y); - reward=0; - - for ( i=0; i< numberStep - 1; i++ ){ - action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy); - agentZone= convertIntoZone( agent->x,agent->y ); - line.receiverZone= agentZone; - line.shooterZone= canonZone; - line.angleHZone=zoneAngleH; - line.angleFZone=zoneAngleF; - line.action= action; - line.reward= 0; - actionStack( stack , line); - moveAgent(agent, action); - } - action =takeAction(agent->x ,agent->y , Q, canonZone, zoneAngleH, zoneAngleF, greedy); - agentZone= convertIntoZone( agent->x,agent->y ); - line.receiverZone= agentZone; - line.shooterZone= canonZone; - line.angleHZone=zoneAngleH; - line.angleFZone=zoneAngleF; - line.action= action; - moveAgent(agent, action); - agentZone= convertIntoZone( agent->x,agent->y ); - - reward = (agentZone==dropZone); - - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += - LEARN_RATE* ( reward - - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] ); - - while ( !emptyStack(stack)){ - reward=line.reward; - maxAction= argmax(Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone]); - agentZone= line.receiverZone; - canonZone= line.shooterZone; - zoneAngleH= line.angleHZone; - zoneAngleF=line.angleFZone; - - line=unStack(stack); - - - Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action] += - DISCOUNT*(reward + - LEARN_RATE* Q[agentZone][canonZone][zoneAngleH][zoneAngleF][maxAction] - -Q[line.receiverZone][line.shooterZone][line.angleHZone][line.angleFZone][line.action]); - } - //if ( numberRun%1000000==1){printf (" %d \n ", numberRun);} - //greedy = greedy - 1/numberRun; ->>>>>>> qlearn - numberRun--; + greedy = greedy - 1/(n+1); + n++; } freeStack(stack); } \ No newline at end of file -- GitLab