From 89d4e0c757f2309d506f32b6bf97eaaddf091209 Mon Sep 17 00:00:00 2001 From: Antonin RAFFIN Date: Sat, 6 Aug 2022 13:51:44 +0200 Subject: [PATCH] Add MsPacman agents (#274) * Add MsPacman agents * Allow to force custom objects * Update changelog * Fix loading issues --- CHANGELOG.md | 5 +- README.md | 10 +- benchmark.md | 4 + enjoy.py | 6 +- .../a2c-MsPacmanNoFrameskip-v4/0.monitor.csv | 187 +++++++++++++++ logs/benchmark/benchmark.md | 4 + .../dqn-MsPacmanNoFrameskip-v4/0.monitor.csv | 142 +++++++++++ .../ppo-MsPacmanNoFrameskip-v4/0.monitor.csv | 165 +++++++++++++ .../0.monitor.csv | 227 ++++++++++++++++++ rl-trained-agents | 2 +- version.txt | 2 +- 11 files changed, 745 insertions(+), 9 deletions(-) create mode 100644 logs/benchmark/a2c-MsPacmanNoFrameskip-v4/0.monitor.csv create mode 100644 logs/benchmark/dqn-MsPacmanNoFrameskip-v4/0.monitor.csv create mode 100644 logs/benchmark/ppo-MsPacmanNoFrameskip-v4/0.monitor.csv create mode 100644 logs/benchmark/qrdqn-MsPacmanNoFrameskip-v4/0.monitor.csv diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d58b1a48..90c0b441e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,12 @@ -## Release 1.5.1a8 (WIP) +## Release 1.6.0 (2022-08-05) ### Breaking Changes - Change default value for number of hyperparameter optimization trials from 10 to 500. (@ernestum) - Derive number of intermediate pruning evaluations from number of time steps (1 evaluation per 100k time steps.) (@ernestum) - Updated default --eval-freq from 10k to 25k steps - Update default horizon to 2 for the `HistoryWrapper` +- Upgrade to Stable-Baselines3 (SB3) >= 1.6.0 +- Upgrade to sb3-contrib >= 1.6.0 ### New Features - Support setting PyTorch's device with thye `--device` flag (@gregwar) @@ -14,6 +16,7 @@ - Added `RecurrentPPO` support (aka `ppo_lstm`) - Added autodownload for "official" sb3 models from the hub - Added Humanoid-v3, Ant-v3, Walker2d-v3 models for A2C (@pseudo-rnd-thoughts) +- Added MsPacman models ### Bug fixes - Fix `Reacher-v3` name in PPO hyperparameter file diff --git a/README.md b/README.md index e9d6ab820..7056d3997 100644 --- a/README.md +++ b/README.md @@ -331,7 +331,7 @@ The previous command will create a `mp4` file. To convert this file to `gif` for python -m utils.record_training --algo ppo --env CartPole-v1 -n 1000 -f logs --deterministic --gif ``` -## Current Collection: 150+ Trained Agents! +## Current Collection: 195+ Trained Agents! Final performance of the trained agents can be found in [`benchmark.md`](./benchmark.md). To compute them, simply run `python -m utils.benchmark`. @@ -354,10 +354,10 @@ Additional Atari Games (to be completed): | RL Algo | MsPacman | Asteroids | RoadRunner | |----------|-------------|-----------|------------| -| A2C | | :heavy_check_mark: | :heavy_check_mark: | -| PPO | | :heavy_check_mark: | :heavy_check_mark: | -| DQN | | :heavy_check_mark: | :heavy_check_mark: | -| QR-DQN | | :heavy_check_mark: | :heavy_check_mark: | +| A2C | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| PPO | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| DQN | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| QR-DQN | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | ### Classic Control Environments diff --git a/benchmark.md b/benchmark.md index 005bc0160..b21e761ec 100644 --- a/benchmark.md +++ b/benchmark.md @@ -39,6 +39,7 @@ and also allow users to have access to pretrained agents.* |a2c |LunarLanderContinuous-v2 | 84.225| 145.906|5M | 149305| 256| |a2c |MountainCar-v0 | -111.263| 24.087|1M | 149982| 1348| |a2c |MountainCarContinuous-v0 | 91.166| 0.255|100k | 149923| 1659| +|a2c |MsPacmanNoFrameskip-v4 | 1671.730| 612.918|10M | 602450| 185| |a2c |Pendulum-v1 | -162.965| 103.210|1M | 150000| 750| |a2c |PongNoFrameskip-v4 | 17.292| 3.214|10M | 594910| 65| |a2c |QbertNoFrameskip-v4 | 3882.345| 1223.327|10M | 610670| 194| @@ -77,6 +78,7 @@ and also allow users to have access to pretrained agents.* |dqn |EnduroNoFrameskip-v4 | 830.929| 194.544|10M | 599040| 14| |dqn |LunarLander-v2 | 154.382| 79.241|100k | 149373| 200| |dqn |MountainCar-v0 | -100.849| 9.925|120k | 149962| 1487| +|dqn |MsPacmanNoFrameskip-v4 | 2682.929| 492.567|10M | 599952| 140| |dqn |PongNoFrameskip-v4 | 20.602| 0.613|10M | 598998| 88| |dqn |QbertNoFrameskip-v4 | 9496.774| 5399.633|10M | 605844| 124| |dqn |RoadRunnerNoFrameskip-v4 | 40396.350| 7069.131|10M | 603257| 137| @@ -100,6 +102,7 @@ and also allow users to have access to pretrained agents.* |ppo |LunarLanderContinuous-v2 | 270.863| 32.072|1M | 149956| 526| |ppo |MountainCar-v0 | -110.423| 19.473|1M | 149954| 1358| |ppo |MountainCarContinuous-v0 | 88.343| 2.572|20k | 149983| 633| +|ppo |MsPacmanNoFrameskip-v4 | 1754.356| 172.783|10M | 600822| 163| |ppo |Pendulum-v1 | -172.225| 104.159|100k | 150000| 750| |ppo |PongNoFrameskip-v4 | 20.989| 0.105|10M | 599902| 90| |ppo |QbertNoFrameskip-v4 | 15627.108| 3313.538|10M | 600248| 83| @@ -122,6 +125,7 @@ and also allow users to have access to pretrained agents.* |qrdqn |EnduroNoFrameskip-v4 | 3231.200| 1311.801|10M | 585728| 5| |qrdqn |LunarLander-v2 | 70.236| 225.491|100k | 149957| 522| |qrdqn |MountainCar-v0 | -106.042| 15.536|120k | 149943| 1414| +|qrdqn |MsPacmanNoFrameskip-v4 | 997.867| 877.130|10M | 604914| 225| |qrdqn |PongNoFrameskip-v4 | 20.492| 0.687|10M | 597443| 63| |qrdqn |QbertNoFrameskip-v4 | 14799.728| 2917.629|10M | 600773| 92| |qrdqn |RoadRunnerNoFrameskip-v4 | 42325.424| 8361.161|10M | 591016| 59| diff --git a/enjoy.py b/enjoy.py index 8e6c33298..1da043942 100644 --- a/enjoy.py +++ b/enjoy.py @@ -62,6 +62,10 @@ def main(): # noqa: C901 parser.add_argument( "--env-kwargs", type=str, nargs="+", action=StoreDict, help="Optional keyword argument to pass to the env constructor" ) + parser.add_argument( + "--custom-objects", action="store_true", default=False, help="Use custom objects to solve loading issues" + ) + args = parser.parse_args() # Going through custom gym packages to let them register in the global registory @@ -170,7 +174,7 @@ def main(): # noqa: C901 newer_python_version = sys.version_info.major == 3 and sys.version_info.minor >= 8 custom_objects = {} - if newer_python_version: + if newer_python_version or args.custom_objects: custom_objects = { "learning_rate": 0.0, "lr_schedule": lambda _: 0.0, diff --git a/logs/benchmark/a2c-MsPacmanNoFrameskip-v4/0.monitor.csv b/logs/benchmark/a2c-MsPacmanNoFrameskip-v4/0.monitor.csv new file mode 100644 index 000000000..53c32d7bf --- /dev/null +++ b/logs/benchmark/a2c-MsPacmanNoFrameskip-v4/0.monitor.csv @@ -0,0 +1,187 @@ +#{"t_start": 1659728336.7725544, "env_id": "MsPacmanNoFrameskip-v4"} +r,l,t +1730.0,3234,3.953365 +1560.0,3298,5.427331 +1100.0,2714,6.639803 +2360.0,3634,8.255448 +1950.0,3874,9.985612 +2450.0,4090,11.803499 +1070.0,3074,13.174083 +1710.0,2530,14.298565 +4470.0,4194,16.174039 +1790.0,2874,17.454673 +2030.0,3162,18.860609 +1600.0,2266,19.866352 +2440.0,3610,21.472004 +1150.0,2210,22.459134 +1530.0,3554,24.041418 +1270.0,2626,25.208103 +1280.0,2482,26.314844 +1680.0,3226,27.752215 +1100.0,2514,28.87338 +2360.0,5218,31.201759 +1200.0,2970,32.53076 +960.0,3418,34.053116 +1730.0,2722,35.263485 +1520.0,4170,37.123587 +1660.0,2682,38.316556 +1580.0,2946,39.626874 +1960.0,3530,41.199417 +1170.0,2530,42.328518 +2130.0,3890,44.061271 +1910.0,3970,45.832457 +1810.0,3050,47.194604 +2430.0,4418,49.168339 +2320.0,2938,50.474335 +2120.0,4906,52.668199 +1360.0,2850,53.939555 +1020.0,2346,54.978707 +2280.0,3698,56.63467 +1560.0,3866,58.354663 +1200.0,3082,59.732336 +1180.0,3834,61.443044 +2100.0,3586,63.045426 +1010.0,2754,64.285646 +1240.0,2674,65.485508 +2990.0,3370,66.994512 +1290.0,2466,68.149448 +1440.0,3130,69.61241 +1560.0,2458,70.763192 +1700.0,3106,72.160394 +2130.0,3522,73.813967 +4950.0,3890,75.72441 +1950.0,2978,77.180707 +2030.0,4090,79.102359 +1550.0,3322,80.647296 +1530.0,3626,82.287508 +1090.0,3162,83.792252 +2030.0,3442,85.417475 +1400.0,2906,86.786747 +1560.0,3106,88.266386 +1210.0,2658,89.512441 +2770.0,4034,91.376633 +1200.0,3242,92.889384 +3110.0,4722,95.071795 +1060.0,2738,96.366151 +1310.0,3338,97.908794 +1810.0,3218,99.350722 +2580.0,3018,100.764254 +1780.0,3906,102.538961 +1630.0,2954,103.861392 +1340.0,2978,105.20175 +1160.0,2330,106.260769 +1650.0,3818,108.327378 +1100.0,2818,109.85005 +950.0,2346,111.110954 +900.0,2234,112.313941 +1670.0,3858,114.393967 +2230.0,4506,116.827441 +1340.0,2786,118.330728 +3030.0,3738,120.362621 +2090.0,3714,122.375252 +1240.0,3218,124.109298 +1220.0,3170,125.812876 +1310.0,2634,127.262277 +1570.0,3194,128.965139 +1290.0,3058,130.362771 +1820.0,2850,131.608693 +1950.0,3842,133.281789 +1430.0,2722,134.451039 +2100.0,3586,136.02444 +1750.0,3594,137.63478 +1590.0,2706,138.844255 +1500.0,3570,140.421348 +1670.0,4362,142.362482 +1490.0,3314,143.837664 +730.0,2874,145.113736 +2590.0,4802,147.192036 +1230.0,2850,148.438278 +1800.0,3650,150.012589 +3370.0,3370,151.468725 +1190.0,2610,152.591411 +1810.0,4946,154.727196 +2520.0,4290,156.59002 +1380.0,2874,157.903495 +2490.0,3810,159.663447 +1670.0,2714,160.8516 +1500.0,3954,162.555276 +1570.0,3690,164.145591 +1690.0,3738,165.75474 +1550.0,4538,167.75505 +1650.0,3562,169.36602 +1260.0,2970,170.68833 +1670.0,2874,171.928043 +1940.0,3018,173.302168 +1030.0,2682,174.551034 +1890.0,3618,176.189759 +1160.0,3034,177.564327 +1680.0,3266,179.01814 +1840.0,3506,180.649786 +1070.0,2538,181.787977 +2030.0,3938,183.56093 +2960.0,4634,185.557277 +1920.0,3410,187.089478 +1620.0,3066,188.49645 +1260.0,2466,189.63132 +1030.0,2914,190.900608 +1740.0,2834,192.218688 +2340.0,3682,193.901766 +1110.0,2762,195.125856 +1190.0,2786,196.371832 +1820.0,3306,197.829656 +1930.0,3010,199.169468 +1200.0,2434,200.343077 +1380.0,3802,202.276492 +1790.0,2810,203.649479 +1980.0,4170,205.609311 +1990.0,3226,207.144824 +1700.0,4266,209.257385 +1010.0,2658,210.532377 +1240.0,2586,211.735485 +1580.0,3186,213.318766 +1090.0,2762,214.718967 +1370.0,3194,216.254075 +1770.0,4986,218.678091 +1140.0,2306,219.77412 +1470.0,3538,221.694823 +870.0,2026,222.677573 +1710.0,3362,224.210367 +2410.0,5522,226.616171 +1280.0,2698,227.791895 +1150.0,2714,228.979634 +1400.0,3610,230.616868 +1630.0,3730,232.327563 +1750.0,2914,233.723598 +1150.0,2626,234.917321 +1730.0,3642,236.556935 +1200.0,3098,237.904774 +1430.0,3842,239.581436 +1350.0,2730,240.753289 +1470.0,4874,242.852802 +1760.0,3170,244.236137 +1550.0,2890,245.477554 +1580.0,3298,246.899028 +1310.0,2866,248.130993 +1590.0,3066,249.452029 +480.0,1762,250.20387 +2260.0,3842,251.851681 +1930.0,4330,253.715691 +1110.0,2866,254.948357 +1120.0,2498,256.028346 +1960.0,2610,257.199669 +1030.0,2482,258.326828 +2170.0,2178,259.308928 +1890.0,3242,260.714789 +1210.0,2706,261.879737 +1670.0,3738,263.499191 +1130.0,2138,264.428535 +1840.0,3546,265.977581 +1840.0,3010,267.331547 +1400.0,3298,268.879427 +2490.0,3042,270.281823 +1180.0,2386,271.379688 +4340.0,3738,273.109107 +1220.0,3106,274.53357 +1260.0,2714,275.779621 +1310.0,2794,277.063536 +1040.0,2674,278.295469 diff --git a/logs/benchmark/benchmark.md b/logs/benchmark/benchmark.md index 005bc0160..b21e761ec 100644 --- a/logs/benchmark/benchmark.md +++ b/logs/benchmark/benchmark.md @@ -39,6 +39,7 @@ and also allow users to have access to pretrained agents.* |a2c |LunarLanderContinuous-v2 | 84.225| 145.906|5M | 149305| 256| |a2c |MountainCar-v0 | -111.263| 24.087|1M | 149982| 1348| |a2c |MountainCarContinuous-v0 | 91.166| 0.255|100k | 149923| 1659| +|a2c |MsPacmanNoFrameskip-v4 | 1671.730| 612.918|10M | 602450| 185| |a2c |Pendulum-v1 | -162.965| 103.210|1M | 150000| 750| |a2c |PongNoFrameskip-v4 | 17.292| 3.214|10M | 594910| 65| |a2c |QbertNoFrameskip-v4 | 3882.345| 1223.327|10M | 610670| 194| @@ -77,6 +78,7 @@ and also allow users to have access to pretrained agents.* |dqn |EnduroNoFrameskip-v4 | 830.929| 194.544|10M | 599040| 14| |dqn |LunarLander-v2 | 154.382| 79.241|100k | 149373| 200| |dqn |MountainCar-v0 | -100.849| 9.925|120k | 149962| 1487| +|dqn |MsPacmanNoFrameskip-v4 | 2682.929| 492.567|10M | 599952| 140| |dqn |PongNoFrameskip-v4 | 20.602| 0.613|10M | 598998| 88| |dqn |QbertNoFrameskip-v4 | 9496.774| 5399.633|10M | 605844| 124| |dqn |RoadRunnerNoFrameskip-v4 | 40396.350| 7069.131|10M | 603257| 137| @@ -100,6 +102,7 @@ and also allow users to have access to pretrained agents.* |ppo |LunarLanderContinuous-v2 | 270.863| 32.072|1M | 149956| 526| |ppo |MountainCar-v0 | -110.423| 19.473|1M | 149954| 1358| |ppo |MountainCarContinuous-v0 | 88.343| 2.572|20k | 149983| 633| +|ppo |MsPacmanNoFrameskip-v4 | 1754.356| 172.783|10M | 600822| 163| |ppo |Pendulum-v1 | -172.225| 104.159|100k | 150000| 750| |ppo |PongNoFrameskip-v4 | 20.989| 0.105|10M | 599902| 90| |ppo |QbertNoFrameskip-v4 | 15627.108| 3313.538|10M | 600248| 83| @@ -122,6 +125,7 @@ and also allow users to have access to pretrained agents.* |qrdqn |EnduroNoFrameskip-v4 | 3231.200| 1311.801|10M | 585728| 5| |qrdqn |LunarLander-v2 | 70.236| 225.491|100k | 149957| 522| |qrdqn |MountainCar-v0 | -106.042| 15.536|120k | 149943| 1414| +|qrdqn |MsPacmanNoFrameskip-v4 | 997.867| 877.130|10M | 604914| 225| |qrdqn |PongNoFrameskip-v4 | 20.492| 0.687|10M | 597443| 63| |qrdqn |QbertNoFrameskip-v4 | 14799.728| 2917.629|10M | 600773| 92| |qrdqn |RoadRunnerNoFrameskip-v4 | 42325.424| 8361.161|10M | 591016| 59| diff --git a/logs/benchmark/dqn-MsPacmanNoFrameskip-v4/0.monitor.csv b/logs/benchmark/dqn-MsPacmanNoFrameskip-v4/0.monitor.csv new file mode 100644 index 000000000..c32b81399 --- /dev/null +++ b/logs/benchmark/dqn-MsPacmanNoFrameskip-v4/0.monitor.csv @@ -0,0 +1,142 @@ +#{"t_start": 1659728618.0716512, "env_id": "MsPacmanNoFrameskip-v4"} +r,l,t +3240.0,4610,4.565343 +2380.0,3650,6.179767 +3240.0,4722,8.242586 +2950.0,4218,9.901634 +2440.0,4450,11.644655 +1600.0,3226,12.901034 +3140.0,3970,14.512564 +2570.0,4658,16.381635 +2720.0,4242,18.09169 +2540.0,4274,19.819639 +2450.0,4098,21.46781 +2450.0,4330,23.219339 +2980.0,4578,25.034832 +2750.0,4714,26.924132 +3050.0,4386,28.647907 +2840.0,4490,30.418036 +2300.0,4402,32.153116 +2290.0,3586,33.554561 +1750.0,3802,35.044998 +2400.0,4074,36.637052 +3240.0,4546,38.421748 +1760.0,4346,40.121146 +2650.0,4402,41.842615 +2500.0,3530,43.217673 +2450.0,4570,45.000941 +3140.0,4618,46.799562 +2880.0,3730,48.256968 +2240.0,3842,49.760671 +3050.0,5538,51.925182 +2730.0,4466,53.678259 +3630.0,4698,55.515136 +3150.0,4578,57.304975 +3290.0,4794,59.179325 +2210.0,3146,60.410003 +2850.0,4450,62.155477 +2480.0,4154,63.773322 +3240.0,4458,65.523309 +2850.0,4450,67.262537 +2640.0,4066,68.852192 +2440.0,4226,70.500446 +3240.0,4626,72.309346 +2830.0,4498,74.066068 +2040.0,3810,75.559353 +2850.0,4938,77.493622 +2180.0,3202,78.742667 +2960.0,4474,80.49608 +2640.0,4714,82.340948 +2240.0,4426,84.070252 +2020.0,4474,85.819671 +2550.0,4234,87.476478 +2300.0,3754,88.943698 +2740.0,3970,90.497502 +1760.0,3698,91.945864 +3110.0,4266,93.617598 +2060.0,3546,94.994061 +2440.0,4258,96.665241 +2850.0,4530,98.440153 +2850.0,4450,100.183625 +2430.0,5162,102.198829 +3030.0,4178,103.835441 +2580.0,3618,105.243359 +2850.0,4450,106.991979 +2720.0,4242,108.651297 +3030.0,4562,110.433888 +2450.0,4354,112.137468 +2700.0,4162,113.759301 +3640.0,4898,115.678251 +2850.0,3986,117.23745 +3030.0,3946,118.78445 +2890.0,4994,120.743981 +2580.0,4786,122.618189 +2640.0,5162,124.632866 +2290.0,4098,126.237313 +2280.0,4842,128.13284 +2640.0,4650,129.952747 +2440.0,4258,131.618814 +2430.0,4226,133.269789 +2640.0,4642,135.086737 +2230.0,3522,136.464286 +2200.0,3146,137.690324 +2530.0,4778,139.554016 +2960.0,4066,141.136904 +2850.0,4450,142.878844 +2840.0,4386,144.598397 +3240.0,4626,146.411167 +2180.0,3202,147.667167 +2440.0,4338,149.359808 +2230.0,3330,150.663123 +2450.0,4810,152.543828 +2840.0,4218,154.185746 +2280.0,4458,155.923587 +3640.0,4418,157.645611 +2350.0,6634,160.237373 +2450.0,4450,161.976687 +2050.0,4258,163.636643 +2740.0,4322,165.323966 +2570.0,4162,166.970684 +2740.0,4378,168.688692 +2850.0,4130,170.294853 +2320.0,3474,171.649862 +3040.0,4186,173.286216 +2730.0,4274,174.960623 +2850.0,4322,176.654881 +3040.0,4394,178.378233 +4230.0,4306,180.056643 +1840.0,4114,181.660494 +3240.0,4842,183.554174 +2570.0,3658,184.979727 +2220.0,4146,186.601411 +1830.0,3378,187.914714 +1670.0,3226,189.174523 +2300.0,3850,190.687076 +2710.0,3842,192.195436 +2850.0,4450,193.952946 +3080.0,5314,196.122141 +2880.0,4922,198.095347 +2240.0,4066,199.693413 +2450.0,4578,201.546361 +2440.0,4066,203.163933 +5040.0,4266,204.833798 +1830.0,4090,206.447688 +2230.0,3650,207.992231 +3240.0,5170,210.199356 +4040.0,4970,212.319058 +3230.0,5090,214.496098 +2740.0,4338,216.337857 +2740.0,4386,218.197287 +3640.0,4378,220.113769 +2270.0,3418,221.690941 +3050.0,5098,223.746212 +2450.0,3482,225.097706 +3240.0,4578,226.889236 +2450.0,3906,228.403416 +3030.0,3970,230.008759 +2310.0,3906,231.596919 +2640.0,4850,233.561483 +2730.0,4274,235.296346 +2640.0,4290,237.03278 +3040.0,4402,238.817874 +2540.0,3778,240.350946 diff --git a/logs/benchmark/ppo-MsPacmanNoFrameskip-v4/0.monitor.csv b/logs/benchmark/ppo-MsPacmanNoFrameskip-v4/0.monitor.csv new file mode 100644 index 000000000..69d134e77 --- /dev/null +++ b/logs/benchmark/ppo-MsPacmanNoFrameskip-v4/0.monitor.csv @@ -0,0 +1,165 @@ +#{"t_start": 1659728997.1370955, "env_id": "MsPacmanNoFrameskip-v4"} +r,l,t +1660.0,4010,4.311036 +1770.0,3386,5.826842 +1780.0,4314,7.759622 +1670.0,3410,9.282951 +1680.0,3402,10.813711 +1780.0,3346,12.352658 +1670.0,4378,14.325256 +1250.0,2874,15.607972 +1670.0,3290,17.08202 +1660.0,3362,18.586194 +1680.0,3674,20.231242 +1790.0,3826,21.947734 +1610.0,3634,23.571569 +1670.0,3386,25.103332 +1520.0,2890,26.4025 +1580.0,3434,27.938063 +1780.0,3722,29.609581 +1760.0,3290,31.081013 +1780.0,3394,32.644059 +1770.0,3282,34.115666 +1670.0,3458,35.716336 +1770.0,3578,37.362477 +1990.0,4754,39.483508 +1750.0,4218,41.35789 +1680.0,3698,43.003232 +1470.0,3594,44.622692 +1620.0,3122,46.074785 +1760.0,3170,47.487425 +1840.0,4498,49.485715 +1830.0,3954,51.252816 +1270.0,3058,52.611309 +1270.0,2578,53.751842 +1660.0,3706,55.415011 +1680.0,3586,57.078042 +2060.0,5330,59.629807 +1790.0,3946,61.759284 +1670.0,3386,63.302713 +1780.0,3274,64.841095 +1680.0,3906,66.663625 +1790.0,3170,68.062446 +1670.0,3698,69.701603 +1650.0,3658,71.357393 +1660.0,3594,72.96934 +1790.0,3338,74.508537 +1750.0,3282,76.131865 +1660.0,3986,77.951711 +1800.0,4210,79.890404 +1770.0,3354,81.425101 +1430.0,3306,82.927539 +1680.0,4370,84.910806 +1900.0,5370,87.421242 +1760.0,3026,88.832213 +1710.0,4394,90.843166 +1670.0,3130,92.26574 +1770.0,3186,93.659618 +1680.0,3546,95.289372 +1760.0,3922,97.076224 +1940.0,4066,98.847867 +1680.0,3698,100.454071 +1780.0,3802,102.11652 +1780.0,3498,103.631747 +2290.0,4626,105.638126 +1680.0,3330,107.084306 +2020.0,4162,108.896756 +1680.0,4482,110.84265 +1760.0,3258,112.250316 +1670.0,3402,113.730119 +1780.0,3274,115.146115 +1780.0,4338,117.028795 +2060.0,3762,118.666529 +1660.0,3306,120.104542 +1770.0,3474,121.607384 +1690.0,4282,123.469273 +1750.0,3330,124.923571 +1650.0,3562,126.505764 +1800.0,4394,128.556068 +1870.0,4634,130.612923 +1780.0,3154,132.05293 +1780.0,3210,133.481938 +1810.0,3786,135.146797 +1670.0,4074,136.939489 +1780.0,3490,138.452728 +1680.0,4146,140.266229 +1470.0,3690,141.869161 +1690.0,3810,143.538072 +1760.0,3586,145.096657 +1780.0,3658,146.675833 +1780.0,3410,148.158734 +2500.0,3986,149.88645 +1760.0,3714,151.497061 +1600.0,3322,152.940172 +1770.0,3450,154.437815 +1680.0,3226,155.837178 +2080.0,3818,157.485126 +1770.0,3570,159.026505 +1670.0,3474,160.529741 +1690.0,3426,162.014512 +1670.0,3602,163.581415 +1640.0,3122,164.92838 +1680.0,3530,166.455764 +1780.0,3426,167.938135 +1670.0,3266,169.359395 +1670.0,4002,171.099225 +1770.0,3394,172.588926 +1790.0,3954,174.304051 +1700.0,4058,176.065544 +1420.0,3138,177.434047 +1880.0,5130,179.657403 +2090.0,4578,181.648489 +1680.0,3626,183.224978 +1760.0,3258,184.651129 +1670.0,3570,186.20624 +1680.0,3330,187.650923 +1670.0,3106,189.008781 +1780.0,4002,190.745732 +1780.0,3346,192.196018 +1670.0,3090,193.534641 +2380.0,4378,195.446541 +1780.0,3410,196.935071 +2010.0,4826,199.03219 +1950.0,4098,200.80808 +1680.0,3986,202.542683 +1780.0,3770,204.175391 +2450.0,4010,205.910917 +1770.0,3258,207.323926 +1650.0,3290,208.757178 +1760.0,3482,210.268661 +2080.0,4514,212.219892 +2150.0,5426,214.573869 +1870.0,4634,216.596694 +1650.0,3250,218.002542 +1780.0,3626,219.576827 +1700.0,3162,220.943001 +1660.0,3162,222.313525 +1800.0,3866,223.993338 +1680.0,3474,225.519474 +1680.0,4890,227.77953 +1780.0,3442,229.34835 +1760.0,2978,230.717079 +1770.0,3154,232.166474 +1860.0,4506,234.206739 +1480.0,3162,235.603306 +1670.0,3202,237.061666 +1770.0,3042,238.435267 +1870.0,4802,240.528921 +1780.0,3370,241.989311 +1460.0,3106,243.339746 +1660.0,3122,244.684675 +1750.0,3330,246.129004 +2080.0,4226,247.953844 +1680.0,3850,249.633671 +1660.0,3090,250.97381 +1780.0,3442,252.478907 +1670.0,3514,254.091421 +1670.0,3282,255.626668 +1800.0,3946,257.386214 +1780.0,4274,259.426431 +1840.0,3490,261.006943 +1770.0,3266,262.462701 +1960.0,4562,264.51024 +2060.0,4226,266.340128 +1860.0,3906,268.189434 +1770.0,3154,269.606311 diff --git a/logs/benchmark/qrdqn-MsPacmanNoFrameskip-v4/0.monitor.csv b/logs/benchmark/qrdqn-MsPacmanNoFrameskip-v4/0.monitor.csv new file mode 100644 index 000000000..d8aae1d76 --- /dev/null +++ b/logs/benchmark/qrdqn-MsPacmanNoFrameskip-v4/0.monitor.csv @@ -0,0 +1,227 @@ +#{"t_start": 1659728083.934625, "env_id": "MsPacmanNoFrameskip-v4"} +r,l,t +2620.0,3026,3.834814 +1840.0,2234,4.736814 +1840.0,3874,6.309424 +350.0,2330,7.249159 +290.0,2274,8.235159 +340.0,2266,9.172496 +650.0,2778,10.313977 +480.0,2818,11.468873 +1840.0,3874,13.093454 +1840.0,2258,14.027411 +340.0,2266,14.970769 +290.0,2786,16.154179 +300.0,2258,17.095724 +340.0,2258,18.036075 +340.0,2266,18.98799 +90.0,1946,19.825573 +290.0,2786,21.015863 +2620.0,3026,22.313512 +290.0,2786,23.498471 +1070.0,2938,24.710338 +2620.0,2946,25.925375 +290.0,2274,26.852503 +440.0,2554,27.883672 +1990.0,2842,29.036318 +290.0,2274,29.955478 +1840.0,3730,31.469566 +870.0,2618,32.525351 +2610.0,3298,33.864388 +370.0,2178,34.739065 +1050.0,3170,36.018179 +1920.0,2514,37.031759 +340.0,2266,37.949726 +1100.0,3194,39.239888 +290.0,2786,40.36607 +340.0,2266,41.280129 +440.0,2330,42.212358 +2620.0,3026,43.446622 +470.0,3450,44.834128 +340.0,2266,45.797408 +340.0,2266,46.710386 +210.0,2450,47.693173 +450.0,2658,48.767104 +2640.0,2970,49.973559 +300.0,2130,50.832939 +670.0,2330,51.774778 +2620.0,3026,53.00587 +1990.0,2842,54.155099 +1070.0,2258,55.069091 +290.0,2786,56.198107 +340.0,2538,57.22623 +1840.0,2634,58.293262 +2620.0,3026,59.522429 +1840.0,3874,61.100117 +1840.0,3258,62.421133 +300.0,2258,63.335555 +340.0,2266,64.254721 +340.0,2538,65.280048 +410.0,2346,66.231496 +1840.0,3850,67.803036 +540.0,2434,68.795772 +290.0,2274,69.712484 +470.0,2514,70.725149 +1900.0,3498,72.146349 +670.0,3010,73.366218 +290.0,2274,74.285753 +1840.0,3874,75.855255 +320.0,2042,76.687012 +410.0,2722,77.791707 +460.0,2538,78.817608 +340.0,2266,79.734663 +1840.0,4154,81.420113 +290.0,2274,82.336323 +290.0,2778,83.460264 +2620.0,3026,84.689365 +1820.0,2330,85.626027 +340.0,2266,86.541832 +290.0,2786,87.669333 +340.0,2266,88.589473 +1840.0,3874,90.162117 +870.0,2954,91.367695 +1010.0,2402,92.342073 +2620.0,3026,93.564576 +340.0,2266,94.475543 +340.0,2266,95.395139 +440.0,2618,96.460896 +2220.0,3010,97.682663 +290.0,2786,98.804189 +290.0,2786,99.935825 +440.0,2554,100.977958 +290.0,2274,101.903294 +2620.0,3026,103.158328 +670.0,2978,104.377339 +370.0,2850,105.554394 +390.0,2002,106.357706 +290.0,2274,107.271534 +470.0,3450,108.703052 +200.0,1626,109.390643 +1840.0,3874,111.045345 +290.0,2786,112.190727 +2620.0,3026,113.406507 +2620.0,3242,114.733953 +1860.0,2202,115.641451 +340.0,2266,116.568477 +160.0,1978,117.369906 +610.0,2338,118.307331 +290.0,2274,119.224921 +2640.0,3122,120.492476 +1270.0,2922,121.762939 +340.0,2266,122.797086 +260.0,2282,123.826988 +1840.0,2194,124.814758 +1840.0,3874,126.578502 +320.0,2242,127.593731 +340.0,2266,128.620268 +650.0,2898,129.953191 +290.0,2786,131.220554 +290.0,2274,132.252182 +290.0,2274,133.283746 +470.0,3450,134.853251 +290.0,2274,135.883404 +1890.0,4450,137.882681 +1860.0,3906,139.77699 +380.0,2122,140.651223 +640.0,2362,141.639291 +2620.0,3026,142.865849 +1840.0,3874,144.443593 +2620.0,3026,145.665649 +340.0,2266,146.591197 +680.0,1682,147.288076 +290.0,2786,148.458345 +2040.0,3194,149.769452 +2620.0,3026,151.029688 +290.0,2274,151.964096 +340.0,2266,152.880234 +290.0,2274,153.804255 +340.0,2266,154.716676 +1990.0,4234,156.449067 +410.0,2402,157.420759 +2010.0,3162,158.702291 +2630.0,3026,159.936962 +610.0,2458,160.934877 +340.0,2266,161.843164 +340.0,2266,162.75768 +370.0,2650,163.832191 +290.0,2274,164.745726 +350.0,2658,165.818526 +2620.0,3026,167.049139 +300.0,2258,167.957802 +290.0,2786,169.089303 +290.0,2786,170.218032 +290.0,2274,171.137163 +500.0,3442,172.53206 +2620.0,3026,173.758882 +2620.0,3026,174.992127 +1010.0,1818,175.729002 +290.0,2274,176.652566 +1870.0,2954,177.842927 +640.0,2402,178.806803 +290.0,2274,179.727772 +1140.0,3282,181.055032 +290.0,2274,181.975493 +1090.0,3538,183.410694 +290.0,2274,184.334543 +300.0,1602,184.980037 +340.0,2266,185.901125 +2620.0,3026,187.167765 +290.0,2274,188.090653 +450.0,2658,189.162903 +2620.0,3026,190.391806 +470.0,3450,191.796391 +460.0,2346,192.741061 +260.0,2050,193.582306 +2620.0,3026,194.809557 +290.0,2330,195.751117 +1070.0,2746,196.865847 +2620.0,3026,198.089877 +340.0,2266,199.001717 +290.0,2274,199.916932 +400.0,2890,201.083027 +1040.0,2762,202.198652 +340.0,2266,203.114162 +2620.0,3266,204.43369 +670.0,2330,205.368736 +2620.0,3026,206.591956 +2620.0,3026,207.815366 +290.0,2786,208.936987 +290.0,2786,210.066915 +2620.0,3242,211.384499 +670.0,2330,212.327147 +290.0,2274,213.245963 +290.0,2274,214.1653 +340.0,2266,215.082036 +290.0,2274,216.001971 +2620.0,3026,217.227557 +1840.0,3874,218.800245 +730.0,2802,219.93364 +260.0,2050,220.758055 +1840.0,2258,221.675833 +620.0,3418,223.059279 +1840.0,2610,224.110616 +660.0,2962,225.304739 +290.0,2274,226.228877 +470.0,3450,227.628947 +1810.0,2210,228.522402 +1070.0,2938,229.708611 +380.0,2122,230.55839 +1830.0,3642,232.038548 +1840.0,3874,233.610315 +290.0,2274,234.525975 +290.0,2274,235.444523 +320.0,2274,236.362874 +340.0,2266,237.277619 +290.0,2090,238.115647 +2620.0,2946,239.308366 +2630.0,3026,240.529524 +260.0,2090,241.367483 +2620.0,3026,242.587913 +290.0,2274,243.505794 +1010.0,1818,244.237888 +1840.0,2490,245.243423 +650.0,2850,246.391867 +1990.0,2842,247.542347 +640.0,2610,248.593004 +1060.0,2602,249.649027 +290.0,2786,250.782081 diff --git a/rl-trained-agents b/rl-trained-agents index f0b0efc31..1e2a45e5d 160000 --- a/rl-trained-agents +++ b/rl-trained-agents @@ -1 +1 @@ -Subproject commit f0b0efc31a9b41953085158c0c57183ba1467b28 +Subproject commit 1e2a45e5d06efd6cc15da6cf2d1939d72dcbdf87 diff --git a/version.txt b/version.txt index 511e75b2e..dc1e644a1 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -1.5.1a8 +1.6.0