diff --git a/src/Environments/LunarLander.py b/src/Environments/LunarLander.py index f869eba..481535e 100644 --- a/src/Environments/LunarLander.py +++ b/src/Environments/LunarLander.py @@ -11,7 +11,7 @@ def __init__(self, algo: Algo): def __repr__(self): return "LunarLander-v3" - def success_func(self, env: gym.Env, info: dict) -> bool: + def success_func(self, env: gym.Env, info: dict) -> tuple[bool|bool]: """ Cette fonction vérifie si le lander est "awake" et met à jour l'info. """ @@ -22,9 +22,9 @@ def success_func(self, env: gym.Env, info: dict) -> bool: # check if the lander is awake if hasattr(base_env, "lander") and not base_env.lander.awake: - return True + return True, False else: - return False + return False, True def objective_metric(self, states)-> list[dict[str, float]]: pass # TODO \ No newline at end of file diff --git a/src/PolicyTrainer/TrainingInfoCallback.py b/src/PolicyTrainer/TrainingInfoCallback.py index 43e60ed..51ef87d 100644 --- a/src/PolicyTrainer/TrainingInfoCallback.py +++ b/src/PolicyTrainer/TrainingInfoCallback.py @@ -17,9 +17,8 @@ def __init__(self): def _on_training_start(self): """Init at the begin of the training""" - self.num_envs = self.training_env.num_envs - self.current_episode_rewards = np.zeros(self.num_envs) - self.current_episode_lengths = np.zeros(self.num_envs, dtype=int) + self.current_episode_rewards = 0 + self.current_episode_lengths = 0 def _on_step(self) -> bool: """call every steps""" @@ -27,24 +26,20 @@ def _on_step(self) -> bool: rewards = self.locals["rewards"] dones = self.locals["dones"] - self.current_episode_rewards += rewards + self.current_episode_rewards += rewards[0] self.current_episode_lengths += 1 - - for i in range(self.num_envs): - if dones[i]: - self.training_metrics["episode_observations"].append( - obs - ) - self.training_metrics["episode_rewards"].append( - self.current_episode_rewards[i] - ) - self.training_metrics["episode_lengths"].append( - self.current_episode_lengths[i] + self.training_metrics["episode_observations"].append( + obs[0] ) - - self.current_episode_rewards[i] = 0 - self.current_episode_lengths[i] = 0 - + if dones[0]: + self.training_metrics["episode_rewards"].append( + self.current_episode_rewards + ) + self.training_metrics["episode_lengths"].append( + self.current_episode_lengths + ) + self.current_episode_rewards = 0 + self.current_episode_lengths = 0 return True def _on_training_end(self) -> None: diff --git a/src/log/CartPole-v1_log.csv b/src/log/CartPole-v1_log.csv index 6d68a25..0b90a52 100644 --- a/src/log/CartPole-v1_log.csv +++ b/src/log/CartPole-v1_log.csv @@ -2094,3 +2094,130 @@ def reward_func(observations: np.ndarray, is_success: bool, is_failure: bool) -> reward = (1 - angle_penalty) * cart_position_reward + time_step_reward + velocity_penalty + stability_bonus return reward";0.0028752910002143956,0.0032272109789440125,0.00460032956992881,0.005861702303273871,0.0019121816802174162,0.006758292537526723,0.010263517839929834,0.002890896513744783,0.018591794137966564,0.0019789743503139893,0.0015321618352290465,0.010093722052068349,0.030582858070524868,0.0019909419173206743,0.004624676601394978,0.006826802525956128,0.004566442288371031,0.0058829991148980215,0.002408147889306335,0.004598109748662968,0.0036635106324553546,0.0041583210951978335,0.00982967683177077,0.0019245409176859073,0.012594326893227358,0.025087522445317596,0.015203495402767286,0.0028605202790410026,0.005073160157121282,0.005454725988989229,0.007229574953948559,0.0028107587093604597,0.0023967894979113704,0.00598865228046724,0.005043657998814842,0.011929728156248582,0.013875119097846318,0.004094270430318339,0.008462911933836647,0.0041023269084268284,0.0041403937934881485,0.007147031541566229,0.0015188527871411626,0.0036577745905936017,0.009846385366277762,0.008018047631566536,0.0032693254720933674,0.004660766701066789,0.0028155277554600877,0.0027903298124410197,0.007288947152209935,0.00945377356942969,0.0023938546863122973,0.0024456963862270675,0.006359773731345482,0.0024254133680305743,0.001477042010841115,0.0027767227932172073,0.0032217898853712946,0.0015696411496009,0.006275994938589198,0.004970280897177813,0.0020142806410855334,0.0032060079706405517,0.004548179603391695,0.0028640486670299522,0.0028749028395775047,0.0007019758504488465,0.008067228650207441,0.006260251995914363,0.004238739269596104,0.013640542497528215,0.0028887202403065893,0.001972752704676225,0.005880400804512832,0.00552859758595872,0.007101817483036422,0.019549867223192425,0.0015156201084009738,0.00454932557422578,0.00199802370405093,0.005390597016016076,0.006294593318822621,0.002367992736416726,0.007179203428695203,0.0045866589318785764,0.0019467347885609141,0.019524067618186824,0.005445510774683445,0.004608995873616599,0.006707049899728178,0.0036741658109295805,0.005510237977814325,0.006764178097929853,0.0033098565142320163,0.0040974496297263045,0.001529568074616895,0.006745478998720721,0.012329520612894702,0.002373371634094355,0.0032270280520691856,0.016763538221173907,0.006817006006648319,0.008841053788507188,0.003707769212648498,0.004525096212889723,0.002778637857685619,0.015156965809861948,0.0037465442243854374,0.004508989990336839,0.0045430714822065794,0.0024294682557554704,0.0046021253523475195,0.00507760710528853,0.007556093419814041,0.00411141184323151,0.017456484280067294,0.0011119346839527022,0.005761928276428123,0.00927225720224072,0.012780311969498049,0.008007294489979127,0.0024172926211280857,0.005504443490868062,0.0028100506606809493,0.004119332712104767,0.006327745175066554,0.005439733056900664,0.002003139182869016,0.015613895486259709,0.00503709254632763,0.020408145222623048,0.002427135184139039,0.00280820784660838,0.0015485400562134887,0.003338500299357775,0.003241646758825706,0.006706377703260206,0.006808251879341548,0.009774534986481334,0.013235040855680618,0.005468182683711869,0.003246512572098175,0.002862559773302325,0.011041240469806068,0.002800407559719978,0.00415785670636353,0.00585298293623311,0.0010901820110688566,0.005501458137790214,0.001504393114451344,0.014165169780880435,0.004973686051314193,0.0029095007176876955,0.0027754196082256364,0.014082820110833411,0.005886797473419689,0.00327320372462958,0.009354924342450978,0.0028395617625412476,0.0014760800068299804,0.00842667647541595,0.00281897910929183,0.00501762773331557,0.006671522749974022,0.0023177814411704132,0.003669239498675059,0.01707896785019789,0.007072381647421523,0.0037451315588552226,0.0028663546219990775,0.0032799842383886176,0.002397969787033512,0.01314272336087123,0.0037259264988855727,0.01784819623580328,0.0045143655861791775,0.012252815313282392,0.02003063742432307,0.01053725230888634,0.013638543405251469,0.013133780430982738,0.003252879082509466,0.009298338165210984,0.002426878940921544,0.005731690900797459,0.006298363494750157,0.0023802388535281434,0.004488727251128743,0.012420948224095282,0.006243882874475996,0.007735855628507923,0.008929564969191768,0.013717670406058988,0.002889926580129211,0.00666245320640156,0.010630278502342815,0.009764080829979274,0.009422840156520337,0.012360727064183735,0.015471789461114691,0.03426303181775295,0.003669546153377814,0.010482053786424205,0.012747711857730748,0.031377393957684074,0.0076945063031887305,0.006774057683093982,0.014492757547404637,0.004230401251532028,0.002866728171364841,0.018381515925541176,0.002428670407555543,0.01194048959142227,0.02181819790917754,0.002883608843284886,0.014281651224288946,0.004587782127848768,0.004183474684429458,0.004097400570173605,0.0024018348842259696,0.02367890787702363,0.0010871268514732555,0.0028683685335365756,0.0067335667829528795,0.003262967983232043,0.001921369751877191,0.009784710800868287,0.01280833831142451,0.01224753417072832,0.006810323196015432,0.003247917672005795,0.018035028011836974,0.011031003012494128,0.01577110743385685,0.004126048931071104,0.004998535273728933,0.0028317679493299113,0.004658165140843716,0.014042927388852629,0.01664484088230372,0.028242070714987643,0.006727573951900867,0.004571997769239937,0.005026573731051026,0.003696268868334751,0.004953149935743277,0.011415643814969418,0.015755123873185065,0.0041070916647404495,0.020498205172048433,0.006412133403421591,0.013801359867271512,0.010165664675263284,0.019874518384207042,0.006824427362439421,0.010739396827055108,0.009606429244313158,0.004119436186943002,0.020090886922820968,0.00855106298048625,0.005001219899808327,0.025391684236429426,0.009854112310824795,0.00805168238772136,0.015875860238339923,0.014680198551047458,0.0032721501271875167,0.007603884105810673,0.02599906219382488,0.0037315414387884085,0.008978508567676204,0.002835506146852666,0.003754705893246564,0.0023700389383434417,0.01831214210420344,0.0019475259810928542,0.013363206714220636,0.003782988192404785,0.017734128433992028,0.003769943837112895,0.011945895241765363,0.009332307706665289,0.0041213403059574185,0.006721051345276321,0.028298711515502794,0.024801607011999578,0.005036512255275024,0.010957838424256024,0.0037505519244642543,0.010612904271043617,0.01629349368100152,0.01567824690413636,0.014542748607637213,0.0033643698408746437,0.011505412578920376,0.021854206632908733,0.004213329125162548,0.01102587102449959,0.015691835984255054,0.0041678952995956595,0.008866448385716307,0.004540071595853712,0.008381873300389522,0.006711916154978599,0.010215010499682974,0.0020048331023690825,0.0015258283951680653,0.01171872957175843,0.026519943090062878,0.01749440260853925,0.004098278312388077,0.005011778363107119,0.012307006411999123,0.008525367890267005,0.012335885329395481,0.013222979329361024,0.004645843653492088,0.005481258887415024,0.003772097725667742,0.04421079265298969,0.0054519780300691465,0.0011243352593590806,0.005482542261394853,0.02464066344455156,0.0067287783158220555,0.006436534226204418,0.011995511478695724,0.014118410047455489,0.010574536892972173,0.011835773289118313,0.007261247873968142,0.030099396301661106,0.04673433555190049,0.004557206223105382,0.00666724721125614,0.009024012173762143,0.018489416990975557,0.017556267406459117,0.00501804337458161,0.006394659909074059,0.012843104869120242,0.019222595366909272,0.018060401472100494,0.008896752292028135,0.006795031174846708,0.011203974593933477,0.009443424187701017,0.008430087531257928,0.018007725499802788,0.02284725912721239,0.013833023089774849,0.027909241895909508,0.00246120458661185,0.0019555278879235926,0.00539096845948689,0.02231216466200787,0.006752112229826845,0.037402441666592026,0.020977533537104017,0.052479527371214515,0.003716474384397381,0.01838928155615838,0.010838411769767213,0.014044153617683101,0.019413406426439322,0.008793760119717216,0.008533289019127293,0.013976792693933445,0.015748556444796422,0.029235006371119688,0.01996971367545966,0.00931609031365806,0.00420520627236511,0.0701494660472656,0.009362166697171416,0.007294500033208533,0.00810241961073215,0.0154827456005757,0.006788571745070631,0.0067931621540814405,0.005474046301209604,0.019607390029709352,0.03978196303391145,0.061042229425094116,0.005895101979155503,0.012833117285343544,0.07340867737389571,0.04847804069644649,0.013639666419230739,0.00901633790258754,0.04028895599493892,0.011129542984899777,0.013758615373549273,0.006756764255763939,0.034969671679541,0.017995439734669185,0.017177786476703087,0.03961865420845259,0.054201828225497864,0.04557577211598893,0.017492721558397203,0.007228513738886495,0.04347781509103851,0.0037604778653159646,0.007149037341508368,0.004618850656017996,0.03795240832590804,0.019694473647606858,0.02883232389863989,0.039643376973175576,0.06218923802341029,0.039050333426718535,0.018057650679321777,0.054534748793999145,0.052986057763316906,0.03883673424395732,0.01622098927783057,0.04306386909643127,0.005420958457457807,0.05117529756976291,0.07131585930459226,0.05719036297263089,0.020204594928580382,0.03697867843171062,0.016625286893740116,0.049973002703361726,0.022362947928721805,0.033066468391443074,0.03863949367915843,0.02059481024297523,0.049500667261469296,0.011995167099874809,0.008038500109309663,0.036974615146404634,0.05716195866155914,0.007148261436213835,0.02667081138010873,0.03750417503369752,0.015417909592872455,0.045566974414855806,0.027155485442498115,0.04045974287936028,0.014610305379617123,0.051086056163505096,0.016788641866896926,0.04727771359388485,0.005944980802986909,0.03487810029551243,0.040776531746579664,0.029312037850460763,0.04641956670591383,0.024505849795631696,0.022034804647909027,0.05809802675648783,0.024919244201754526,0.018275319959049182,0.07174237086237559,0.10246985046256138,0.09695872774888145,0.04641070902577271,0.08900944562099813,0.020948852677827676,0.15665327540209367,0.04468159838319067,0.11216297006838934,0.08331104754141266,0.03343673414700705,0.04904764220029267,0.07121539558415138,0.05996809558147418,0.06966054772259814,0.08217680659419158,0.0872379969744164,0.02176867292769497,0.0773389048273778,0.0412652273663566,0.05890897136111018,0.05765024181191181,0.0726610370893837,0.05068037528659851,0.06926042934621028,0.11964450573346237,0.041745881067298776,0.1674618549080215,0.0639209198077328,0.15750646423339373,0.10664053849084382,0.09639629468544288,0.07512581868155926,0.09460547294927354,0.10006943297562958,0.06217064658483058,0.12788775418305104,0.06405410611983117,0.03900687399466047,0.08250779818891829,0.11958180375130482,0.12199977922836915,0.10803652827091609,0.1916353910748752,0.17117218278303303,0.1123924890370037,0.21936730946643784,0.1369834388794819,0.09962507258022245,0.17259560837049015,0.13095926930276897,0.09455223678292071,0.09107597720758352,0.15628180369668762,0.17234931516864613,0.07132812988648327,0.1555082250000943,0.2192270775058065,0.16244506779077245,0.17448883632020054,0.21937974667401686,0.18754015697662874,0.15748364769358011;0.023691796379740112;0.03690224528911291;0.93 +CartPole-v1;qwen2.5-coder;;;nan;0;0.0 +CartPole-v1;qwen2.5-coder;"def reward_func(observations:np.ndarray, is_success:bool, is_failure:bool) -> float: + """"""Reward function for CartPole-v1 + + Args: + observations (np.ndarray): observation on the current state + is_success (bool): True if the goal is achieved, False otherwise + is_failure (bool): True if the episode ends unsuccessfully, False otherwise + + Returns: + float: The reward for the current step + """""" + cart_position = observations[0] + cart_velocity = observations[1] + pole_angle = observations[2] + pole_angular_velocity = observations[3] + + # Define the reward function to encourage balance + if is_success: + return 1.0 # Maximum reward for successful episode + elif is_failure: + return -1.0 # Minimum penalty for failed episode + + # Reward based on cart position and pole angle + cart_reward = np.exp(-np.abs(cart_position)) # Penalize moving the cart far from center + pole_reward = np.exp(-2 * np.abs(pole_angle)) # Encourage keeping the pole upright + + # Combine rewards + reward = cart_reward + pole_reward + + return reward";;nan;0;0.0 +CartPole-v1;qwen2.5-coder;;0.015449866122760537,0.024278361050052273,0.013242742390937604,0.01876055172049494,0.02869260851369814,0.020967675452317872,0.024278361050052273,0.012139180525026137,0.015449866122760537,0.020967675452317872,0.023174799184140806,0.05517809329557335,0.013242742390937604,0.012139180525026137,0.012139180525026137,0.026485484781875207,0.013242742390937604,0.07393864501606828,0.016553427988672004,0.013242742390937604,0.03531397970916694,0.03862466530690134,0.04634959836828161,0.015449866122760537,0.06842083568651094,0.016553427988672004,0.03310685597734401,0.015449866122760537,0.01434630425684907,0.01434630425684907,0.02869260851369814,0.015449866122760537,0.01765698985458347,0.01765698985458347,0.06842083568651094,0.012139180525026137,0.012139180525026137,0.01103561865911467,0.024278361050052273,0.02538192291596374,0.01876055172049494,0.01434630425684907,0.013242742390937604,0.01765698985458347,0.03200329411143254,0.029796170379609608,0.016553427988672004,0.03972822717281281,0.015449866122760537,0.019864113586406405,0.02207123731822934,0.013242742390937604,0.01765698985458347,0.01765698985458347,0.01103561865911467,0.027589046647786674,0.020967675452317872,0.012139180525026137,0.030899732245521075,0.03421041784325547,0.03862466530690134,0.020967675452317872,0.03752110344098988,0.024278361050052273,0.01434630425684907,0.02538192291596374,0.027589046647786674,0.029796170379609608,0.023174799184140806,0.019864113586406405,0.03531397970916694,0.03531397970916694,0.02207123731822934,0.023174799184140806,0.013242742390937604,0.030899732245521075,0.0938027586024747,0.020967675452317872,0.02538192291596374,0.027589046647786674,0.020967675452317872,0.03310685597734401,0.015449866122760537,0.012139180525026137,0.026485484781875207,0.02538192291596374,0.01876055172049494,0.019864113586406405,0.01765698985458347,0.020967675452317872,0.012139180525026137,0.01103561865911467,0.01103561865911467,0.01434630425684907,0.023174799184140806,0.058488778893307745,0.012139180525026137,0.04634959836828161,0.04966028396601601,0.048556722100104546,0.02869260851369814,0.024278361050052273,0.01765698985458347,0.01765698985458347,0.013242742390937604,0.040831789038724274,0.023174799184140806,0.07614576874789122,0.06290302635695362,0.06069590262513068,0.05738521702739628,0.027589046647786674,0.07614576874789122,0.01103561865911467,0.03531397970916694,0.06179946449104215,0.05517809329557335,0.07614576874789122,0.06069590262513068,0.01434630425684907,0.03421041784325547,0.01434630425684907,0.019864113586406405,0.03421041784325547,0.02207123731822934,0.03752110344098988,0.03310685597734401,0.013242742390937604,0.03972822717281281,0.06511015008877655,0.041935350904635744,0.13132386204346458,0.020967675452317872,0.052970969563750414,0.04634959836828161,0.06731727382059949,0.04524603650237014,0.03310685597734401,0.020967675452317872,0.01434630425684907,0.012139180525026137,0.05517809329557335,0.03421041784325547,0.013242742390937604,0.08166357807744855,0.027589046647786674,0.07173152128424536,0.027589046647786674,0.06290302635695362,0.08276713994336002,0.016553427988672004,0.07945645434562562,0.02538192291596374,0.026485484781875207,0.047453160234193076,0.02207123731822934,0.03531397970916694,0.05517809329557335,0.052970969563750414,0.02869260851369814,0.04524603650237014,0.07062795941833389,0.058488778893307745,0.03972822717281281,0.027589046647786674,0.07504220688197975,0.027589046647786674,0.024278361050052273,0.04966028396601601,0.030899732245521075,0.08718138740700589,0.03200329411143254,0.04303891277054721,0.029796170379609608,0.05407453142966188,0.048556722100104546,0.06842083568651094,0.02538192291596374,0.17436277481401177,0.06731727382059949,0.01765698985458347,0.058488778893307745,0.03310685597734401,0.08718138740700589,0.03310685597734401,0.05628165516148481,0.0938027586024747,0.015449866122760537,0.013242742390937604,0.026485484781875207,0.052970969563750414,0.10263125352976643,0.01765698985458347,0.02869260851369814,0.03200329411143254,0.06290302635695362,0.18870907907086085,0.09821700606612056,0.0938027586024747,0.12139180525026136,0.09932056793203202,0.11477043405479256,0.06511015008877655,0.09932056793203202,0.08166357807744855,0.1434630425684907,0.1136668721888811,0.059592340759219216,0.06731727382059949,0.15229153749578245,0.08056001621153709,0.10263125352976643,0.024278361050052273,0.08718138740700589,0.029796170379609608,0.07062795941833389,0.07393864501606828,0.08166357807744855,0.1302203001775531,0.04303891277054721,0.184294831607215,0.1765698985458347,0.1202882433843499,0.16112003242307418,0.27809759020968966,0.19312332653450673,0.09932056793203202,0.19864113586406404,0.1335309857752875,0.09600988233429762,0.11918468151843843,0.2736833427460438,0.1335309857752875,0.09600988233429762;0.048207732450200914;0.04415335554082067;0.0 +CartPole-v1;qwen2.5-coder;"def reward_func(observations:np.ndarray, is_success:bool, is_failure:bool) -> float: + """"""Reward function for CartPole-v1 + + Args: + observations (np.ndarray): observation on the current state + is_success (bool): True if the goal is achieved, False otherwise + is_failure (bool): True if the episode ends unsuccessfully, False otherwise + + Returns: + float: The reward for the current step + """""" + if is_success: + return 1.0 + elif is_failure: + return -1.0 + else: + pole_angle = observations[2] + cart_position = observations[0] + + # Penalize large angles and positions to encourage stability + angle_penalty = np.clip(pole_angle, -0.45, 0.45) + position_penalty = np.clip(cart_position, -1.5, 1.5) + + return -angle_penalty - position_penalty";-0.0028231581308871692,-0.005756134222968586,-0.0065301778316937615,-0.0039954932081873494,-0.0018418150480217922,-0.0009212710319779818,-0.0037700845928245776,-0.011443027984093348,-0.0029374896643652895,-0.0014163480078717165,-0.004749760779968249,-0.003795372182656028,-0.0007172604534813563,-0.0018485883776545823,-0.0027752856927991884,-0.00214911948198067,-0.0032905471109720393,-0.004959080865945529,-0.003132451457872666,-0.00809386732580796,0.007894071301714838,-0.026114404058003797,0.0002636698181960855,-0.0037726180053920598,-0.002691405223965957,-0.011792279131111187,-0.002962966435413775,-0.004492168135257482,-0.007189268255180317,-0.00025793488385321227,-0.00031307388134030535,-0.02081970554174982,-0.002384508825312976,-0.005673059451393382,-0.0010002288991606387,-0.0022497256604361664,0.0007182330776945295,-0.005500171938325135,-0.0077894168937299915,-0.002769151635203442,-0.001467653070409139,-0.0007356424001102578,-0.007177009309180604,-0.004926500537363221,-0.025571898960979448,-0.0013186594261019517,-0.011034816274737676,-0.0016986197392242738,-0.005732870794939388,-0.005706176493042592,-0.006653505888276654,-0.0056811623263776576,0.002036036815001052,5.0282794080962454e-06,-0.0013469441876569636,-0.0011600295421478409,-0.004401653455130769,-0.008025392315870829,-0.0007523617627953945,0.0005746846515308022,-0.003971898102519782,-0.008861364852844918,-0.0019032594845183695,-0.00023559129446768141,-0.0022446880205501413,-0.008406932395442076,-0.003925527085091588,0.018325293077863042,-0.008186049789098674,0.0010257702159569729,0.0025624237397545383,-0.002727063821955239,-0.0035545805954950546,-0.007133956855564054,-0.0019166483530942803,-0.005568275539550075,-0.0014645366639142517,-0.0005994882435044804,-0.0009827063478882756,-0.002988547446437829,-0.006704603492362332,-0.005882185447978402,-0.003204088866864036,0.0015468637637870701,-0.0032195522446972703,-0.0017825080727404685,-0.0012501105232663563,-0.0019241772300878533,-0.0017966495817548708,-0.0002349111310965704,-0.0006433095169182732,-0.006437493791034632,-0.0007935730401938061,-0.00023095730284948844,-0.001288324127207046,0.002921187270989942,-0.0033140131831569213,-0.004128070122527604,0.0026696180891226285,-0.010508869878848647,-0.007605225742774995,-0.0030556132630773997,-0.003764937139010047,-0.00195514841431422,-0.002326321529391383,-0.0060029173313220306,0.020705062315332104,0.0015909213038169672,0.00032878635136911747,0.004648867810266703,-0.002379949987200325,-0.006042160211534272,-0.00477839181335402,-0.016134240395151517,-0.006374602900423453,0.029702754836998798,-0.002714667984061554,-0.0027588734321860806,-0.005996716173054015,0.001126426191527883,-0.0034309815711071933,0.008958300557420385,-0.003945920104532834,-0.0028930611389613985,0.005902284410435788,-0.0021616277302771406,0.006414290737304116,-0.0077059724067953875,-0.03528963709694577,0.002981649385692778,-0.0014395637131426697,-0.0024009111170954856,-0.001338021850651555,-0.013043349039080561,-0.013223264166925736,0.0018728546593801138,0.0004373788143333801,-0.0021436246964219663,-0.0007422837172137854,0.006948782343284854,-0.0033353350222833046,-0.0001803865603980683,-0.006791435045500659,-0.0011312048017461172,-0.009033044836037797,-0.006126232963712778,-0.00225558733596378,-0.0029615836984016974,-0.010467014236811465,-0.0026260300781916547,-0.006328055764897464,-0.006584908318795425,-0.0058526494680460105,-0.004916580552153393,-0.0007843079732872271,-0.004307801082577548,-0.013843603303058673,-0.0019525389749101732,0.000978036193780476,-0.004577175881587134,-0.005803403495064037,-0.0021615792925477177,-0.010217547659746534,0.0005296507744549455,-0.002659622795967212,0.0009968978108100168,-0.009682184353378974,0.016033295593175366,-0.005633056694795305,-0.003557498148279047,-0.006604299069374218,-0.0018878889792461333,-0.0009612752122467392,-0.0024121840049498825,-0.0012785118000367532,-0.008243711135915014,0.001611214293145834,0.009536607679415426,-0.002484297343286559,-0.005796340677364085,-0.0015730310378501678,-0.0005715781393903485,0.0006089892195415744,-0.006367908524932469,-0.012124584175505155,0.002477296411359446,-0.00045830661653694033,-0.0015088698002467464,0.002339386147562416,6.620157719650645e-05,-0.012957653891864535,-0.005229419349088709,0.01549758161539563,-0.023196847987652484,-0.003518176088144626,0.002338527028159666,0.03291127161444635,0.006611420796936271,0.005404786454328226,-0.005568891995226177,0.0018195582116084226,-0.0023262219853339714,0.0006261703732521504,-0.00018677372092683713,-0.0037222097338692733,0.003784869607258001,-0.0038320914278452377,-0.0027022141535385296,-0.0052530457261217795,-0.03783058547632838,0.0467357791983719,-0.0017773444270238318,0.0007792990169159809,0.004017247901213454,-0.0006513874619864565,-0.0019107601671879697,0.00958129860237766,-0.005207740505724293,0.0018066385820663736,0.010621544118542554,0.006663578897701629,-0.0017938189481424803,-0.0051201702807400825,-0.006195858424992861,-0.048414755448625585,0.005750701664989119,-0.0028799084664452254,0.017495524107480936,0.0006845591268065347,-0.009215154185343032,-0.012674342147129394,0.12404311304364118,-0.003666115842199898,-0.004480332922571207,0.0006160103494044794,0.000545534837566594,-0.006559762159984178,-0.023783435730229778,-0.0017295091815484056,-0.0017216573617946105,-0.0066558059568295165,-0.00519023526898422,-0.005961557465352034,-0.0036361066454225054,0.010294752044434764,-0.005347187949715468,-0.0064373347162190965,-0.00519191245339205,-0.0031979489170277436,-0.002313540496855491,-0.017073118482977816,-0.0011288784485949303,-0.0034333597596688204,-0.02013333107176273,0.01240504417302727,-0.009882725367248603,0.0005313094135138747,0.001181776340700517,-0.0026015417257909184,0.011708276554294602,0.008052470268793329,-0.003918532515780796,-0.0056298873028212066,-0.003420788438684637,-0.040974028016792605,0.2918873347549976,-0.0012104973444287518,-0.0071454733604400045,-0.016782544976676472,0.0001852715225791019,-0.003313661404793753,0.18506681627720353,-0.03117548135151402,0.0640060391985518,-0.0006559153493730916,0.02337563005159524,-0.005440797801614673,-0.003847709245724702,-0.0025409004104610397,0.012761760951879851,-0.003743634296793309,0.02546339407571287,-0.027048141346264865,-0.015202042295589151,0.0010292115729326834,0.12758551674548677,0.0006581828114772084,0.004328300359906439,0.0025746942256399397,-0.0017466225341187878,-0.007613348542877114,-0.0018007871952248854,-0.009566163634639076,-0.0013596545250657039,-0.0281421188781694,-0.0057692548409435905,0.00026050593275439696,-0.002566174626725238,0.31568749932755236,-0.0018521845164829194,-0.008759475614707891,0.3814616678291533,-0.0019072692827789566,-0.00891871901292159,0.11381427100300316,0.15032853848976396,-0.010645878400926005,-0.02310256384776758,0.0368163353526466,0.3189889583850955,0.40779868981948997,0.008652813442220322,-0.02010717498009973,0.23528414510093648,0.032442899824944846,0.2925314417658204,-0.025454646982690274,0.317352065268635,0.001955454050381749,0.14751156462124695,-0.0006682162905220253,0.009435924331110231,0.0008047134639581963,-0.026769031041856614,-0.0020551858222217973,0.006060709756187759;0.00791103547565957;0.054816969820298746;0.0 +CartPole-v1;qwen2.5-coder;;0.029347756878481027,0.041086859629873435,0.01056519247625317,0.05634769320668357,0.037565128804455714,0.025826026053063302,0.016434743851949374,0.02230429522764558,0.016434743851949374,0.018782564402227857,0.014086923301670893,0.01173910275139241,0.019956474677367098,0.014086923301670893,0.02465211577792406,0.02113038495250634,0.015260833576810134,0.019956474677367098,0.03169557742875951,0.026999936328202544,0.030521667153620268,0.04813032128070888,0.03286948770389875,0.02230429522764558,0.02465211577792406,0.02347820550278482,0.05986942403210129,0.017608654127088615,0.012913013026531651,0.014086923301670893,0.02347820550278482,0.02230429522764558,0.025826026053063302,0.0457825007304304,0.019956474677367098,0.018782564402227857,0.01173910275139241,0.014086923301670893,0.036391218529316476,0.015260833576810134,0.030521667153620268,0.019956474677367098,0.029347756878481027,0.030521667153620268,0.015260833576810134,0.025826026053063302,0.014086923301670893,0.015260833576810134,0.019956474677367098,0.02113038495250634,0.02347820550278482,0.014086923301670893,0.02347820550278482,0.028173846603341785,0.019956474677367098,0.014086923301670893,0.03169557742875951,0.02465211577792406,0.016434743851949374,0.02113038495250634,0.014086923301670893,0.02230429522764558,0.012913013026531651,0.014086923301670893,0.0457825007304304,0.009391282201113928,0.029347756878481027,0.015260833576810134,0.017608654127088615,0.041086859629873435,0.017608654127088615,0.017608654127088615,0.036391218529316476,0.02113038495250634,0.05752160348182281,0.016434743851949374,0.016434743851949374,0.02113038495250634,0.03286948770389875,0.09273891173600005,0.01056519247625317,0.030521667153620268,0.025826026053063302,0.014086923301670893,0.018782564402227857,0.012913013026531651,0.019956474677367098,0.02230429522764558,0.018782564402227857,0.04695641100556964,0.018782564402227857,0.025826026053063302,0.012913013026531651,0.018782564402227857,0.03521730825417723,0.029347756878481027,0.028173846603341785,0.02113038495250634,0.02465211577792406,0.02113038495250634,0.015260833576810134,0.017608654127088615,0.012913013026531651,0.02347820550278482,0.04813032128070888,0.06691288568293674,0.02113038495250634,0.025826026053063302,0.06456506513265826,0.03404339797903799,0.04226076990501268,0.02347820550278482,0.05752160348182281,0.017608654127088615,0.015260833576810134,0.029347756878481027,0.015260833576810134,0.0457825007304304,0.037565128804455714,0.04226076990501268,0.03521730825417723,0.018782564402227857,0.029347756878481027,0.041086859629873435,0.018782564402227857,0.01173910275139241,0.012913013026531651,0.030521667153620268,0.08686936036030384,0.06456506513265826,0.028173846603341785,0.014086923301670893,0.04813032128070888,0.02465211577792406,0.03873903907959495,0.062217244582379774,0.03404339797903799,0.015260833576810134,0.016434743851949374,0.039912949354734197,0.03404339797903799,0.04813032128070888,0.02465211577792406,0.015260833576810134,0.07630416788405067,0.0716085267834937,0.11386929668850639,0.039912949354734197,0.061043334307240536,0.06691288568293674,0.019956474677367098,0.02230429522764558,0.0716085267834937,0.03404339797903799,0.016434743851949374,0.03404339797903799,0.025826026053063302,0.030521667153620268,0.029347756878481027,0.02113038495250634,0.039912949354734197,0.04930423155584812,0.06808679595807599,0.028173846603341785,0.039912949354734197,0.05986942403210129,0.04226076990501268,0.025826026053063302,0.10330410421225321,0.017608654127088615,0.028173846603341785,0.029347756878481027,0.02230429522764558,0.04460859045529116,0.037565128804455714,0.025826026053063302,0.10682583503767093,0.1772604515460254,0.0457825007304304,0.02230429522764558,0.1056519247625317,0.05752160348182281,0.08334762953488611,0.05399987265640509,0.037565128804455714,0.10799974531281017,0.0657389754077975,0.06456506513265826,0.05869551375696205,0.025826026053063302,0.12678230971503804,0.03404339797903799,0.061043334307240536,0.11504320696364562,0.06808679595807599,0.02113038495250634,0.030521667153620268,0.04226076990501268,0.019956474677367098,0.02347820550278482,0.029347756878481027,0.07982589870946839,0.036391218529316476,0.051652052106126604,0.05634769320668357,0.05282596238126585,0.13030404054045575,0.07865198843432915,0.131477950815595,0.018782564402227857,0.13499968164101273,0.02113038495250634,0.08099980898460764,0.11973884806420258,0.09743455283655701,0.051652052106126604,0.06808679595807599,0.09626064256141777,0.11621711723878486,0.04813032128070888,0.06808679595807599,0.029347756878481027,0.019956474677367098,0.037565128804455714,0.13969532274156968,0.09860846311169624,0.19369519539797478,0.11621711723878486,0.1455648741172659,0.03286948770389875,0.10095628366197473,0.16669525906977223,0.11856493778906335,0.18782564402227855,0.0716085267834937,0.131477950815595,0.10917365558794942,0.05634769320668357,0.131477950815595,0.12443448916475955,0.13969532274156968,0.07043461650835446,0.1807821823714431,0.05986942403210129,0.15026051521782285,0.15965179741893679,0.13382577136587348,0.2547385297052153,0.1549561563183798,0.05634769320668357,0.14086923301670892;0.047512966076253195;0.04155215758124642;0.01 +CartPole-v1;qwen2.5-coder;;0.015267386089271834,0.00678550492856526,0.005089128696423945,0.018660138553554462,0.00678550492856526,0.02035651478569578,0.00791642241665947,0.010743716136894994,0.013005551113083415,0.018660138553554462,0.008481881160706575,0.01017825739284789,0.01017825739284789,0.00904733990475368,0.022052891017837094,0.011874633624989204,0.008481881160706575,0.00904733990475368,0.0073509636726123645,0.01017825739284789,0.00791642241665947,0.01017825739284789,0.01244009236903631,0.009612798648800784,0.015267386089271834,0.01583284483331894,0.027707478458308142,0.017529221065460253,0.00904733990475368,0.008481881160706575,0.005654587440471049,0.01017825739284789,0.00791642241665947,0.00904733990475368,0.014136468601177624,0.008481881160706575,0.010743716136894994,0.00904733990475368,0.0073509636726123645,0.0073509636726123645,0.01809467980950736,0.0073509636726123645,0.01357100985713052,0.008481881160706575,0.02488018473807262,0.00904733990475368,0.009612798648800784,0.018660138553554462,0.01244009236903631,0.01017825739284789,0.014701927345224729,0.03279660715473209,0.00678550492856526,0.026576560970213933,0.01017825739284789,0.008481881160706575,0.0073509636726123645,0.01583284483331894,0.009612798648800784,0.006220046184518155,0.010743716136894994,0.014136468601177624,0.005089128696423945,0.018660138553554462,0.006220046184518155,0.014701927345224729,0.008481881160706575,0.00791642241665947,0.01357100985713052,0.01017825739284789,0.0073509636726123645,0.010743716136894994,0.027707478458308142,0.01017825739284789,0.01583284483331894,0.011874633624989204,0.024314725994025514,0.005654587440471049,0.00678550492856526,0.01357100985713052,0.0073509636726123645,0.015267386089271834,0.013005551113083415,0.015267386089271834,0.01017825739284789,0.018660138553554462,0.015267386089271834,0.014136468601177624,0.014136468601177624,0.00678550492856526,0.00791642241665947,0.020921973529742885,0.01583284483331894,0.00904733990475368,0.015267386089271834,0.013005551113083415,0.011874633624989204,0.00904733990475368,0.01357100985713052,0.009612798648800784,0.00678550492856526,0.009612798648800784,0.010743716136894994,0.034492983386873406,0.01809467980950736,0.022052891017837094,0.016398303577366043,0.006220046184518155,0.00904733990475368,0.01244009236903631,0.00791642241665947,0.03279660715473209,0.010743716136894994,0.017529221065460253,0.014136468601177624,0.01357100985713052,0.01017825739284789,0.010743716136894994,0.019791056041648675,0.034492983386873406,0.021487432273789988,0.01244009236903631,0.016398303577366043,0.015267386089271834,0.00678550492856526,0.0073509636726123645,0.005654587440471049,0.011309174880942099,0.009612798648800784,0.011874633624989204,0.04240940580353287,0.016398303577366043,0.021487432273789988,0.011309174880942099,0.01809467980950736,0.020921973529742885,0.0339275246428263,0.018660138553554462,0.029969313434496565,0.009612798648800784,0.027707478458308142,0.023749267249978407,0.03618935961901472,0.010743716136894994,0.01809467980950736,0.009612798648800784,0.019791056041648675,0.01922559729760157,0.01809467980950736,0.02488018473807262,0.00791642241665947,0.022052891017837094,0.023749267249978407,0.04862945198805103,0.009612798648800784,0.018660138553554462,0.005089128696423945,0.01017825739284789,0.0339275246428263,0.023183808505931304,0.01017825739284789,0.03562390087496761,0.009612798648800784,0.005654587440471049,0.05202220445233366,0.015267386089271834,0.01696376232141315,0.0073509636726123645,0.014136468601177624,0.025445643482119723,0.03336206589877919,0.01017825739284789,0.014136468601177624,0.011309174880942099,0.022618349761884197,0.011874633624989204,0.013005551113083415,0.035058442130920506,0.016398303577366043,0.02035651478569578,0.03618935961901472,0.023183808505931304,0.022052891017837094,0.02827293720235525,0.05993862686899313,0.03279660715473209,0.035058442130920506,0.008481881160706575,0.04240940580353287,0.057111333148757604,0.02035651478569578,0.026576560970213933,0.018660138553554462,0.010743716136894994,0.014136468601177624,0.018660138553554462,0.00791642241665947,0.01357100985713052,0.014136468601177624,0.011309174880942099,0.03279660715473209,0.09895528020824337,0.0073509636726123645,0.017529221065460253,0.014701927345224729,0.019791056041648675,0.01244009236903631,0.022052891017837094,0.00904733990475368,0.008481881160706575,0.03166568966663788,0.023749267249978407,0.0565458744047105,0.08368789411897154,0.07407509547017076,0.05258766319638076,0.0576767918928047,0.024314725994025514,0.026576560970213933,0.00904733990475368,0.08199151788683022,0.02714201971426104,0.04240940580353287,0.06446229682136996,0.050891286964239446,0.08312243537492443,0.06672413179755839,0.06446229682136996,0.021487432273789988,0.021487432273789988,0.054849498172569185,0.037320277107108925,0.017529221065460253,0.04127848831543866,0.05371858068447497,0.07746784793445338,0.05937316812494602,0.04862945198805103,0.13231734610702256,0.02488018473807262,0.14079922726772914,0.10800262011299705,0.07181326049398233,0.08199151788683022,0.16115574205342492,0.03562390087496761,0.04240940580353287,0.16398303577366044,0.024314725994025514,0.13853739229154072,0.10856807885704416,0.10743716136894994,0.04127848831543866,0.091604316535631,0.13457918108321099,0.21543978148194698,0.102348032672526,0.113091748809421,0.10178257392847889,0.25558735230929147,0.1685067057260373,0.10800262011299705,0.1487156496843886,0.24823638863667907,0.28272937202355247,0.28272937202355247,0.28272937202355247,0.28272937202355247,0.2488018473807262;0.03429328606090881;0.050412723097485866;0.99 +CartPole-v1;qwen2.5-coder;"def reward_func(observations:np.ndarray, is_success:bool, is_failure:bool) -> float: + """"""Reward function for CartPole-v1 + + Args: + observations (np.ndarray): observation on the current state + is_success (bool): True if the goal is achieved, False otherwise + is_failure (bool): True if the episode ends unsuccessfully, False otherwise + + Returns: + float: The reward for the current step + """""" + + x, x_dot, theta, theta_dot = observations + + if is_success: + return 10.0 + elif is_failure: + return -10.0 + else: + # Reward based on how close to vertical the pole is and how stable it is + proximity_to_vertical = np.cos(theta) + stability_factor = np.exp(-abs(theta_dot)) + + reward = proximity_to_vertical * stability_factor + + return reward";0.0018976092968420438,0.0036188449141197386,0.014176615773515684,0.005637506026218698,0.01064402112973418,0.0019300029017399304,0.0057150901410796045,-0.0013305977969357253,-0.001806213873580475,0.00018493911421622156,-0.0012614211857301908,-0.002344192201262835,0.00766399495347233,0.014929310243559476,0.005037701419182238,0.0038340915957754654,-0.0052061032273504095,0.007629752305538132,-0.0014144070793597654,-0.0016740329585961224,0.011110560106635138,-0.0031851531208457345,-0.0026278447648846624,0.008401125350096727,0.005787916431337359,-0.0028572615657948925,-0.0031574387719160977,-0.0008585121960246434,-0.0036952265124178363,-0.002702871286163832,-5.9687855933691674e-05,0.003816850429385201,-0.002602325719686034,0.004271009527289456,-0.0034274101364667993,0.011469786893881744,-0.0043067217602038525,0.011738360856195083,0.004035745965442034,0.027941684728614264,-0.0005625816905919634,-0.001242819489315506,-0.00031369755699251395,0.004117077794910425,-0.001007621466615381,0.004449066525940587,0.0008164926860664901,-0.00398408396462808,-0.0028052836256768953,0.0032197431633699107,0.00016890050562132168,0.0053024711628245624,0.0030588999733083323,-0.002678900523432041,-8.34836173909192e-05,0.0019828312787816146,0.0027645891238964205,-0.0049462723965418365,-0.0014988114403359232,0.004851350780578323,0.008079247496293001,-0.002007737257725698,0.006312116843835111,0.0018485390095496752,0.011503926213031337,0.0008502147729848732,0.014004053873642253,0.012857682779979295,0.00147072661689325,0.0012638841505965774,-0.00408733589174861,0.005892690489176268,-0.0016763157578103427,-0.004163911375831785,-0.0012448038369833726,-0.0016386190689926213,0.0016854635043842717,-0.003657433483733703,-0.0005405304167179099,-0.0031908931049258516,0.0025956561682537564,0.002375691750347165,0.004082163886123371,-0.0020512109883829796,-0.004817366279367275,0.005502509515243404,-0.0029318520888759357,0.00258330121433698,0.015719618504367736,0.014438139546058727,6.76349104854022e-05,0.020067526902674788,0.0020305465199794716,0.0023407910514401456,0.008332104257783184,0.039327903540214725,0.006131172241688565,0.00016245008460635457,0.002675822616229824,-0.002783544729644886,0.00019859161542857455,0.01523552080854873,-0.004914056004014313,0.005227122790523675,0.007724239088123218,-0.001455344863527057,0.011528698794349566,0.008079444288973255,0.01648956118126068,0.016631682956182076,0.003902641791686544,0.029584152196961342,-0.0047983127411449335,-0.002468317986012897,-0.004695888985368008,0.004700695103602034,0.01407491666444312,0.016154542161476076,0.032894866026420194,0.02370290034030821,-0.0032141127754228655,0.007935014954687937,0.007584701058532017,0.00721864642807177,0.002165494881637837,0.005018664791461806,0.009781041448147404,0.019550323453871983,0.003768835077030623,0.016372701912884438,-0.003593743791797628,0.004276031363946807,0.0045716256900446605,0.007786522275735258,0.004084370054632807,0.02439419221612408,0.0026804929453497633,0.0241518235015096,0.005168862323203485,0.02478680339565782,0.009915635158285986,0.006089333283937788,-0.0032646100080787614,-0.003203915830390459,0.0036719394988492127,0.025941461248590827,-0.000701747338664426,0.020344079075889743,0.0029680794658791397,0.0030046618242625736,0.015137327415060131,0.020616802847539764,0.0047727839186765325,0.0036480545513739885,0.005275693566019918,0.0074884389567202265,0.03253781561716153,0.03089580390256083,0.004294067535243678,0.004903668770737314,0.010577124524237218,0.014572986949804441,0.029774336648903128,0.027426081102972934,0.01670496586606988,0.03201070839710554,0.021669051072000248,0.004893487236372604,0.03340271811713492,0.007205164495634243,0.018605221653883768,0.044261323609982486,0.038535688366478214,0.0018132246862600703,0.01695558634897682,-0.0032785429834431795,0.022352570487367482,0.020149081207551572,0.002377532598413029,0.027802790555688583,0.016572489437358767,0.02053759863340812,0.0138809908013208,0.005282265231364945,0.011044967645642664,0.02197619112187655,0.003106077614136729,0.007281041366217918,0.011834716329129273,0.0027441326634283203,0.0058652247725575505,0.007537092877665056,0.07218753071953854,0.021133851055770328,0.1175697048163023,0.04528067247101676,0.034303783527010655,0.018345121682721878,0.03257896741890922,0.01141982629263025,0.001706215689895979,0.005777093400870943,0.020790330171626615,0.06340566601166148,0.031644387342486584,0.11778051353490578,0.049423402796370856,0.05756243954976017,0.022045709540660766,0.05115419873832896,0.018402483940048526,0.05299604900269168,0.05324707418313289,0.05480488947253587,0.06946837596524773,0.023455401223717202,0.044008946403731465,0.042471744093578054,0.04547073127705317,0.08978059863459163,0.13476659985273517,0.0594854741392773,0.03534916925229443,0.08515238486968629,0.06553671714388279,0.006624525636066414,0.10820741125657177,0.2211981496375472,0.14083703004573828,0.07140352422607377,0.18173306837385206,0.08150210971840838,0.17233497712158136,0.10745251265903005,0.20002380299615324,0.28423940584724444,0.2792064191027958,0.2900412148460416,0.25979119848835214,0.2866714757368949,0.2892846706365853,0.2957215774752563,0.2917173159677832;0.027105204291396245;0.058142361817957094;0.99 +CartPole-v1;qwen2.5-coder;;0.01779524926704047,0.006749922135773971,0.026999688543095885,0.027613317828166246,0.008590809990985054,0.00613629285070361,0.009204439276055415,0.010431697846196138,0.013499844271547943,0.016567990696899747,0.01227258570140722,0.015954361411829387,0.009818068561125775,0.009818068561125775,0.009204439276055415,0.007363551420844332,0.010431697846196138,0.011045327131266498,0.02454517140281444,0.007363551420844332,0.006749922135773971,0.02393154211774408,0.013499844271547943,0.01288621498647758,0.03497686924901058,0.014727102841688664,0.009818068561125775,0.01288621498647758,0.00613629285070361,0.016567990696899747,0.02024976640732191,0.011658956416336859,0.011045327131266498,0.015340732126759024,0.01288621498647758,0.011045327131266498,0.014113473556618303,0.009818068561125775,0.017181619981970108,0.010431697846196138,0.02454517140281444,0.026386059258025524,0.0251588006878848,0.009818068561125775,0.00613629285070361,0.021477024977462636,0.007977180705914693,0.015340732126759024,0.008590809990985054,0.0251588006878848,0.009204439276055415,0.009204439276055415,0.009818068561125775,0.009818068561125775,0.008590809990985054,0.008590809990985054,0.013499844271547943,0.011658956416336859,0.014113473556618303,0.006749922135773971,0.010431697846196138,0.02024976640732191,0.022090654262532997,0.009204439276055415,0.027613317828166246,0.015340732126759024,0.014727102841688664,0.007363551420844332,0.011045327131266498,0.00613629285070361,0.005522663565633249,0.007977180705914693,0.023317912832673718,0.007977180705914693,0.008590809990985054,0.02024976640732191,0.013499844271547943,0.029454205683377328,0.009818068561125775,0.010431697846196138,0.014113473556618303,0.009818068561125775,0.007977180705914693,0.014113473556618303,0.015340732126759024,0.030067834968447688,0.021477024977462636,0.010431697846196138,0.02024976640732191,0.01227258570140722,0.022090654262532997,0.020863395692392275,0.02577242997295516,0.007977180705914693,0.009204439276055415,0.03865864495943274,0.021477024977462636,0.034363239963940216,0.03068146425351805,0.02454517140281444,0.03743138638929202,0.007977180705914693,0.014727102841688664,0.01288621498647758,0.029454205683377328,0.01227258570140722,0.016567990696899747,0.033135981393799495,0.045408567095206714,0.05522663565633249,0.01902250783718119,0.01902250783718119,0.006749922135773971,0.026999688543095885,0.021477024977462636,0.011658956416336859,0.007363551420844332,0.006749922135773971,0.033749610678869855,0.010431697846196138,0.0362041278191513,0.014727102841688664,0.033749610678869855,0.015340732126759024,0.011658956416336859,0.016567990696899747,0.010431697846196138,0.022704283547603357,0.01963613712225155,0.02577242997295516,0.026386059258025524,0.02577242997295516,0.016567990696899747,0.01779524926704047,0.02454517140281444,0.03068146425351805,0.021477024977462636,0.016567990696899747,0.01227258570140722,0.0251588006878848,0.027613317828166246,0.026999688543095885,0.02454517140281444,0.03681775710422166,0.01288621498647758,0.022704283547603357,0.009818068561125775,0.015954361411829387,0.10615786631717246,0.04049953281464382,0.032522352108729134,0.046022196380277075,0.014727102841688664,0.07363551420844332,0.009818068561125775,0.022090654262532997,0.02024976640732191,0.01288621498647758,0.007977180705914693,0.01227258570140722,0.01902250783718119,0.04356767923999563,0.030067834968447688,0.058908411366754655,0.04970397209069924,0.013499844271547943,0.03988590352957346,0.0251588006878848,0.01227258570140722,0.021477024977462636,0.021477024977462636,0.009818068561125775,0.0362041278191513,0.04909034280562888,0.0251588006878848,0.06934010921295079,0.03865864495943274,0.031908722823658774,0.08038543634421728,0.034363239963940216,0.08529447062478018,0.027613317828166246,0.06688559207266935,0.06504470421745827,0.07977180705914692,0.06811285064281007,0.08406721205463946,0.07424914349351368,0.09327165133069487,0.09449890990083559,0.044794937810136354,0.0503176013757696,0.013499844271547943,0.1116805298828057,0.045408567095206714,0.021477024977462636,0.09572616847097631,0.06197655779210646,0.12824852057970546,0.09388528061576523,0.06872647992788043,0.08897624633520235,0.09940794418139848,0.03559049853408094,0.1570890969780124,0.1619981312585753,0.06381744564731755,0.06749922135773971,0.11290778845294643,0.029454205683377328,0.08897624633520235,0.07793091920393584,0.04234042066985491,0.08529447062478018,0.09327165133069487,0.07118099706816187,0.11904408130365003,0.08222632419942838,0.08836261705013199,0.16567990696899748,0.07118099706816187,0.15770272626308277,0.09020350490534307,0.1307030377199869,0.12149859844393147,0.08590809990985054,0.12211222772900183,0.15218006269744952,0.1460437698467459,0.19083870765688227,0.2644742218653256,0.18899781980167119,0.028840576398306967,0.1509528041273088,0.10615786631717246,0.15279369198251988,0.14727102841688663,0.2687696268608181,0.273678661141381,0.27920132470701425;0.041522248289761095;0.04942235894440996;0.81 +CartPole-v1;qwen2.5-coder;"def reward_func(observations:np.ndarray, is_success:bool, is_failure:bool) -> float: + """"""Reward function for CartPole-v1 + + Args: + observations (np.ndarray): observation on the current state + is_success (bool): True if the goal is achieved, False otherwise + is_failure (bool): True if the episode ends unsuccessfully, False otherwise + + Returns: + float: The reward for the current step + """""" + cart_position = observations[0] + cart_velocity = observations[1] + pole_angle = observations[2] + pole_angular_velocity = observations[3] + + if is_success: + return 1.0 + elif is_failure: + return -1.0 + else: + return 0.0";-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,-0.06388765649999399,0.06388765649999399,-0.06388765649999399,-0.06388765649999399;-0.0633661246101981;0.008146588546506347;0.93 +CartPole-v1;qwen2.5-coder;;0.015792738612631113,0.00694880498955769,0.026531800869220273,0.017056157701641605,0.009475643167578669,0.008212224078568179,0.01263419089010489,0.011370771801094403,0.011370771801094403,0.012002481345599647,0.010107352712083913,0.006317095445052445,0.008212224078568179,0.009475643167578669,0.0075805145340629345,0.02337325314669405,0.015161029068125869,0.00694880498955769,0.024636672235704538,0.031585477225262226,0.010739062256589157,0.013265900434610137,0.03411231540328321,0.015792738612631113,0.01958299587966258,0.024636672235704538,0.025900091324715026,0.017056157701641605,0.01389760997911538,0.008843933623073424,0.03348060585877796,0.0075805145340629345,0.006317095445052445,0.0075805145340629345,0.013265900434610137,0.013265900434610137,0.018319576790652093,0.008843933623073424,0.013265900434610137,0.01263419089010489,0.0075805145340629345,0.011370771801094403,0.008843933623073424,0.0075805145340629345,0.02210983405768356,0.01263419089010489,0.008843933623073424,0.009475643167578669,0.012002481345599647,0.01263419089010489,0.00694880498955769,0.010739062256589157,0.008843933623073424,0.021478124513178314,0.008843933623073424,0.010107352712083913,0.008212224078568179,0.024636672235704538,0.02337325314669405,0.017056157701641605,0.012002481345599647,0.008212224078568179,0.016424448157136357,0.006317095445052445,0.03474402494778845,0.010739062256589157,0.016424448157136357,0.006317095445052445,0.03474402494778845,0.013265900434610137,0.015161029068125869,0.01768786724614685,0.011370771801094403,0.012002481345599647,0.015792738612631113,0.022741543602188805,0.005685385900547201,0.008843933623073424,0.008212224078568179,0.010107352712083913,0.01263419089010489,0.013265900434610137,0.0075805145340629345,0.013265900434610137,0.01263419089010489,0.016424448157136357,0.010739062256589157,0.008843933623073424,0.00694880498955769,0.024004962691199293,0.01389760997911538,0.030322058136251738,0.022741543602188805,0.02779521995823076,0.024004962691199293,0.012002481345599647,0.024004962691199293,0.010739062256589157,0.009475643167578669,0.011370771801094403,0.027163510413725517,0.022741543602188805,0.01389760997911538,0.01768786724614685,0.0353757344922937,0.01958299587966258,0.02905863904724125,0.02779521995823076,0.0353757344922937,0.010107352712083913,0.010107352712083913,0.018319576790652093,0.027163510413725517,0.009475643167578669,0.00694880498955769,0.0410611203928409,0.07454172625161885,0.03600744403679894,0.006317095445052445,0.008212224078568179,0.02779521995823076,0.02337325314669405,0.07201488807359788,0.013265900434610137,0.00694880498955769,0.010107352712083913,0.018951286335157337,0.015792738612631113,0.03916599175932516,0.014529319523620625,0.015792738612631113,0.00694880498955769,0.006317095445052445,0.020214705424167825,0.010739062256589157,0.028426929502736006,0.015161029068125869,0.02905863904724125,0.016424448157136357,0.02337325314669405,0.04421966811536712,0.01768786724614685,0.022741543602188805,0.0353757344922937,0.00694880498955769,0.010107352712083913,0.011370771801094403,0.031585477225262226,0.015792738612631113,0.024636672235704538,0.016424448157136357,0.018951286335157337,0.02337325314669405,0.029690348591746494,0.008843933623073424,0.03474402494778845,0.03853428221481992,0.03474402494778845,0.03979770130383041,0.018951286335157337,0.010739062256589157,0.11812968482248073,0.0353757344922937,0.010739062256589157,0.03916599175932516,0.03411231540328321,0.02905863904724125,0.0467465062933881,0.017056157701641605,0.13771268070214332,0.04548308720437761,0.04548308720437761,0.05369531128294579,0.024636672235704538,0.010739062256589157,0.015792738612631113,0.03474402494778845,0.02084641496867307,0.03853428221481992,0.032217186769767474,0.017056157701641605,0.0410611203928409,0.010107352712083913,0.04990505401591432,0.014529319523620625,0.06822463080656642,0.11433942755544926,0.05685385900547201,0.06759292126206116,0.09475643167578668,0.060644116272503476,0.0353757344922937,0.030322058136251738,0.04042941084833565,0.10296865575435486,0.1036003652988601,0.020214705424167825,0.048641634926903835,0.04548308720437761,0.02905863904724125,0.060644116272503476,0.03853428221481992,0.08528078850820801,0.018951286335157337,0.054958730371956276,0.04169282993734614,0.06317095445052445,0.061275825817008724,0.04800992538239859,0.09475643167578668,0.1048637843878706,0.16361277202685834,0.06253924490601921,0.06632950217305068,0.08528078850820801,0.10865404165490207,0.09475643167578668,0.061275825817008724,0.11433942755544926,0.17308841519443702,0.08970275531974473,0.1048637843878706,0.22867885511089853,0.22299346921035132,0.18951286335157336,0.10170523666534438,0.17119328656092128,0.12950045662357515,0.31585477225262226,0.31585477225262226,0.31585477225262226,0.18509089654003666,0.31585477225262226;0.038466502220774294;0.0530299321182047;0.81 +CartPole-v1;qwen2.5-coder;"def reward_func(observations:np.ndarray, is_success:bool, is_failure:bool) -> float: + """"""Reward function for CartPole-v1 + + Args: + observations (np.ndarray): observation on the current state + is_success (bool): True if the goal is achieved, False otherwise + is_failure (bool): True if the episode ends unsuccessfully, False otherwise + + Returns: + float: The reward for the current step + """""" + cart_pos, cart_vel, pole_angle, pole_vel = observations + + if is_success: + return 1.0 # High reward for successfully balancing the pole + elif is_failure: + return -1.0 # Low reward for failing to balance the pole + else: + return -0.05 * (np.abs(pole_angle) + cart_vel**2) # Penalize the agent for deviations from the goal";-0.044621715701483516,-0.058903761657906534,-0.06599842510708714,-0.04736297500402852,-0.05167993515995643,-0.05995002917035275,-0.05736519053371013,-0.04725345762036421,-0.05510966466171495,-0.05185512236047597,-0.053118498740595564,-0.04931766255669792,-0.047217523280463095,-0.048070187458939526,-0.0505967144811824,-0.0616046737703572,-0.07673193645434517,-0.044195880210967584,-0.05469798926598944,-0.0628490928085095,-0.05112706650632907,-0.044947388706567656,-0.04580584379708024,-0.04654687323095045,-0.05663474123417496,-0.07399485748037722,-0.044077578499226236,-0.05267582853052308,-0.05103594162780109,-0.04530321557030376,-0.0627756085213726,-0.051226932919506266,-0.05575640393006884,-0.05354173186244425,-0.04836900460594434,-0.0461649340715892,-0.05438795018648556,-0.044722793138469,-0.050924649949503416,-0.04514788851052237,-0.05177656221492848,-0.046864615526039544,-0.04503829833450902,-0.04833033528149357,-0.04620053358489286,-0.04922147998440038,-0.11645766181333671,-0.05242304577342717,-0.04892140479455274,-0.05175636538417913,-0.05667614239673043,-0.049357596769785315,-0.05219482076941321,-0.04387234494988215,-0.056583192890997155,-0.047900268002371016,-0.04586892601601759,-0.04323067132023788,-0.0528003184801309,-0.048543144906287285,-0.0454587209093884,-0.055044609537133214,-0.061981311660696176,-0.045901017918503206,-0.05103157648111077,-0.07530428654855414,-0.0537609033313707,-0.05718599137050568,-0.04479005580033903,-0.0504888583138714,-0.05375534960066471,-0.049042316785483674,-0.050514414046239284,-0.05107851917884484,-0.04851466922414853,-0.0479486658281996,-0.049527060202782344,-0.06141600654903691,-0.0899541637572068,-0.049606575629808136,-0.04787422578564696,-0.04773379471351181,-0.05114912631185919,-0.04542571320517612,-0.05245308295659315,-0.051662667907471455,-0.04534739731532504,-0.0483794046745859,-0.04803631762658858,-0.055940015988986715,-0.047253309946178004,-0.04801297090730714,-0.04793003869558278,-0.04752274636290923,-0.04683985771501488,-0.051142350162306886,-0.054493769653536576,-0.04948968414548136,-0.05173485931250672,-0.050775645629228414,-0.046933005329094064,-0.04606008341425871,-0.0599299637861857,-0.06615418333363589,-0.04999996683848582,-0.05688954456572289,-0.051974669140693264,-0.04900326635704706,-0.04780621210905557,-0.05334833889525031,-0.04548431312008921,-0.049232319875863656,-0.05618371359111072,-0.04735897432979778,-0.057912868170457064,-0.0459895523488272,-0.04944890153525672,-0.04549441005529413,-0.067791814209233,-0.04979546929912881,-0.09267624442300373,-0.06616051957043632,-0.05696067082662716,-0.048376573836988,-0.047170465154052384,-0.055824465544703175,-0.04543348301903259,-0.049895165527271274,-0.04740496848873298,-0.053248823143975006,-0.05034789015446256,-0.052244424672891886,-0.054002601006338684,-0.04899705592752858,-0.04484035989218398,-0.05465605333213917,-0.04684489849636524,-0.050983825112605095,-0.05255996701008334,-0.05712732749527237,-0.04895663614492998,-0.05503851763915906,-0.05285297722749329,-0.04678739023964172,-0.04403731880783586,-0.04783008756237667,-0.05070007901171815,-0.048300823411968404,-0.04506749158373773,-0.050585358355137444,-0.05260663624411323,-0.048853419338541634,-0.044369773587547595,-0.04608503595292185,-0.047374907717777186,-0.05285522305864878,-0.04777074567710611,-0.04941026156578289,-0.04873283976330299,-0.05087661288068697,-0.05290724048414386,-0.050627299084707186,-0.055987618539194844,-0.04853688229951856,-0.05168505676630553,-0.05364853372116631,-0.05291309866504151,-0.06416691241826736,-0.046679083841482095,-0.04494395888616095,-0.04637180661898861,-0.04631728008266809,-0.04759496532619201,-0.050491827269744514,-0.055827370959097414,-0.05012564574237728,-0.049788408817092185,-0.054499412563863134,-0.04784035026549167,-0.065819135703697,-0.05997030413111077,-0.04866979934023495,-0.053120592641981386,-0.05403043602665301,-0.04823796291414738,-0.04851506228898293,-0.05764825922187366,-0.06986308710235303,-0.09020571236567836,-0.06567318386691687,-0.04798857465211858,-0.05333780467285834,-0.0452967804575253,-0.04653622377379372,-0.07670326252174867,-0.06671425461555755,-0.09641952897553481,-0.05262332087524767,-0.04965632884731042,-0.049234896621100005,-0.048316446393826734,-0.04783179707704866,-0.04699508558961396,-0.048886408448307855,-0.053284639332869546,-0.04727673878611847,-0.04713823984657506,-0.0566286871221292,-0.06484938109736327,-0.04688311603506402,-0.06174571330377229,-0.05862218217858827,-0.05285526281887701,-0.052036149454916615,-0.04527454975098878,-0.046916204325571545,-0.05976426501858133,-0.05692457010137888,-0.05914952400295345,-0.052218352737029294,-0.05843480677942901,-0.0454531478337082,-0.06589893095642987,-0.04375308009290939,-0.047898505313722965,-0.04849438333882038,-0.05127294462536784,-0.05007929207101879,-0.05521212754258729,-0.06143949731199114,-0.06495581058476102,-0.06085914944002141,-0.05004873536582459,-0.04925372605797032,-0.06553548117085325,-0.04738819563667777,-0.05144124691858805,-0.05137219002146199,-0.05657843841060214,-0.08030284123796803,-0.05139777070406838,-0.04954820879829708,-0.05308459069792364,-0.047407347112300435,-0.0506074033876977,-0.058171919193814696,-0.051573972679543466,-0.04929494201771135,-0.061102054231277195,-0.056472961342045054,-0.058251564878554554,-0.05859449066403122,-0.0481114336574155,-0.05142335984892135,-0.050186938183707334,-0.06078684136595348,-0.04471478870240573,-0.05197033861100384,-0.054050599358560734,-0.04799864824545378,-0.05715696388252556,-0.05312289250297594,-0.05515393640349988,-0.04778627875435143,-0.04820604773911481,-0.061821903289872464,-0.059500951090230486,-0.05078432857716136,-0.05636591773248654,-0.049205522933550586,-0.047589331970666185,-0.05357405180863526,-0.05753000716156664,-0.04715672298353734,-0.04840199658681382,-0.047498148177898554,-0.05108323663910554,-0.04711925407779878,-0.04584143390324624,-0.048254116911328984,-0.044800553086802114,-0.05333081712195184,-0.05121682211174322,-0.05255338674560383,-0.04680426580025366,-0.048942450452244386,-0.048861283918790396,-0.04663572832581641,-0.053754475007763335,-0.048863777159997776,-0.049066358982345945,-0.04536945956562291,-0.04902801597332571,-0.05077750423531913,-0.052566713949546165,-0.057875333919918094,-0.051202781538561455,-0.052537782650742104,-0.04536413276087938,-0.05009021075239485,-0.04822578867910619,-0.04562976676438498,-0.05632674190589464,-0.053661114162640015,-0.052520710277444545,-0.050815940730892406,-0.04850631718155323,-0.050557147027667004,-0.05454147516987914,-0.06173599404354995,-0.05065332185708977,-0.046487811727899155,-0.04859481145542898,-0.0506150278078189,-0.053122748679548734,-0.049236677202516906,-0.05824282726003023,-0.047454111673398226,-0.05137279120395113,-0.0531157526124792,-0.04905639922059456,-0.056406548455686094,-0.05049580240797646,-0.05275434181371987,-0.047089970161216174,-0.05257462543280848,-0.06125036307119575,-0.05307546660930371,-0.049493085410290225,-0.047021661391887154,-0.047204140286521076,-0.04677923090333826,-0.05179770761389828,-0.04941153993283933,-0.048739565563074495,-0.04590132412705132,-0.04944892612948702,-0.049190759876681055,-0.05296342297007519,-0.04891750682846236,-0.05103371797497463,-0.04495327970419785,-0.04826050544029041,-0.04878042550378993,-0.05858609046689446,-0.04973842918933798,-0.04746228218935147,-0.05024556896971142,-0.05013694384543476,-0.04937414452223727,-0.0527925859223366,-0.05467706427013192,-0.047766779997360835,-0.047261012903075265,-0.04927638486490591,-0.04806836567619152;-0.052388376304813675;0.00802787594511661;0.0 diff --git a/src/main.py b/src/main.py index ca1ff2d..68b6fbd 100644 --- a/src/main.py +++ b/src/main.py @@ -44,7 +44,7 @@ def main(): human_feedback = True LoggerCSV(env_type, model) viral = VIRAL( - env_type=env_type, model=model, hf=human_feedback, training_time=20000, options=additional_options) + env_type=env_type, model=model, hf=human_feedback, training_time=25000, options=additional_options) viral.generate_context(Prompt.CARTPOLE) viral.generate_reward_function(n_init=1, n_refine=3) for state in viral.memory: