Fix autodetect dtype warnings (openai#1234)

* Fix autodetect dtype warnings * Use warnings module for gym logger * Fix warning in tests
quagla · Nov 29, 2018 · cdd212d · cdd212d
1 parent e09b0f5
commit cdd212d
Show file tree

Hide file tree

Showing 12 changed files with 65 additions and 37 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,7 @@
 *.py~
 .DS_Store
 .cache
+.pytest_cache/
 
 # Setuptools distribution and build folders.
 /dist/

diff --git a/gym/envs/box2d/bipedal_walker.py b/gym/envs/box2d/bipedal_walker.py
@@ -1,6 +1,7 @@
-import sys, math
-import numpy as np
+import sys
+import math
 
+import numpy as np
 import Box2D
 from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)
 
@@ -137,9 +138,9 @@ def __init__(self):
 
         self.reset()
 
-        high = np.array([np.inf]*24)
-        self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]))
-        self.observation_space = spaces.Box(-high, high)
+        high = np.array([np.inf] * 24)
+        self.action_space = spaces.Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32)
+        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
 
     def seed(self, seed=None):
         self.np_random, seed = seeding.np_random(seed)

diff --git a/gym/envs/classic_control/acrobot.py b/gym/envs/classic_control/acrobot.py
@@ -1,9 +1,10 @@
 """classic Acrobot task"""
-from gym import core, spaces
-from gym.utils import seeding
 import numpy as np
 from numpy import sin, cos, pi
 
+from gym import core, spaces
+from gym.utils import seeding
+
 __copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
 __credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann",
                "William Dabney", "Jonathan P. How"]
@@ -86,7 +87,7 @@ def __init__(self):
         self.viewer = None
         high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
         low = -high
-        self.observation_space = spaces.Box(low=low, high=high)
+        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
         self.action_space = spaces.Discrete(3)
         self.state = None
         self.seed()

diff --git a/gym/envs/classic_control/continuous_mountain_car.py b/gym/envs/classic_control/continuous_mountain_car.py
@@ -9,16 +9,18 @@
 and then modified by Arnaud de Broissia
 
 * the OpenAI/gym MountainCar environment
-itself from 
+itself from
 http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
 permalink: https://perma.cc/6Z2N-PFWC
 """
 
 import math
+
+import numpy as np
+
 import gym
 from gym import spaces
 from gym.utils import seeding
-import numpy as np
 
 class Continuous_MountainCarEnv(gym.Env):
     metadata = {
@@ -40,8 +42,10 @@ def __init__(self):
 
         self.viewer = None
 
-        self.action_space = spaces.Box(low=self.min_action, high=self.max_action, shape=(1,))
-        self.observation_space = spaces.Box(low=self.low_state, high=self.high_state)
+        self.action_space = spaces.Box(low=self.min_action, high=self.max_action,
+                                       shape=(1,), dtype=np.float32)
+        self.observation_space = spaces.Box(low=self.low_state, high=self.high_state,
+                                            dtype=np.float32)
 
         self.seed()
         self.reset()

diff --git a/gym/envs/classic_control/mountain_car.py b/gym/envs/classic_control/mountain_car.py
@@ -4,10 +4,12 @@
 """
 
 import math
+
+import numpy as np
+
 import gym
 from gym import spaces
 from gym.utils import seeding
-import numpy as np
 
 class MountainCarEnv(gym.Env):
     metadata = {
@@ -27,7 +29,7 @@ def __init__(self):
         self.viewer = None
 
         self.action_space = spaces.Discrete(3)
-        self.observation_space = spaces.Box(self.low, self.high)
+        self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)
 
         self.seed()
         self.reset()

diff --git a/gym/envs/tests/test_envs.py b/gym/envs/tests/test_envs.py
@@ -1,5 +1,6 @@
-import numpy as np
 import pytest
+import numpy as np
+
 from gym import envs
 from gym.envs.tests.spec_list import spec_list
 
@@ -8,7 +9,14 @@
 # envs.
 @pytest.mark.parametrize("spec", spec_list)
 def test_env(spec):
-    env = spec.make()
+    # Capture warnings
+    with pytest.warns(None) as warnings:
+        env = spec.make()
+
+    # Check that dtype is explicitly declared for gym.Box spaces
+    for warning_msg in warnings:
+        assert not 'autodetected dtype' in str(warning_msg.message)
+
     ob_space = env.observation_space
     act_space = env.action_space
     ob = env.reset()
@@ -40,4 +48,3 @@ def test_random_rollout():
             (ob, _reward, done, _info) = env.step(a)
             if done: break
         env.close()
-
diff --git a/gym/envs/toy_text/guessing_game.py b/gym/envs/toy_text/guessing_game.py
@@ -1,7 +1,8 @@
+import numpy as np
+
 import gym
 from gym import spaces
 from gym.utils import seeding
-import numpy as np
 
 
 class GuessingGame(gym.Env):
@@ -40,7 +41,8 @@ def __init__(self):
         self.range = 1000  # Randomly selected number is within +/- this value
         self.bounds = 10000
 
-        self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]))
+        self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
+                                       dtype=np.float32)
         self.observation_space = spaces.Discrete(4)
 
         self.number = 0

diff --git a/gym/envs/toy_text/hotter_colder.py b/gym/envs/toy_text/hotter_colder.py
@@ -1,7 +1,8 @@
+import numpy as np
+
 import gym
 from gym import spaces
 from gym.utils import seeding
-import numpy as np
 
 
 class HotterColder(gym.Env):
@@ -25,7 +26,8 @@ def __init__(self):
         self.range = 1000  # +/- value the randomly select number can be between
         self.bounds = 2000  # Action space bounds
 
-        self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]))
+        self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
+                                       dtype=np.float32)
         self.observation_space = spaces.Discrete(4)
 
         self.number = 0

diff --git a/gym/envs/toy_text/kellycoinflip.py b/gym/envs/toy_text/kellycoinflip.py
@@ -1,23 +1,26 @@
-import gym
-from gym import spaces
-from gym.utils import seeding
-from gym.spaces import prng
 # for Generalized Kelly coinflip game distributions:
 from scipy.stats import genpareto
 import numpy as np
 import numpy.random
 
+import gym
+from gym import spaces
+from gym.utils import seeding
+from gym.spaces import prng
+
+
 def flip(edge, np_random):
     return np_random.uniform() < edge
 
+
 class KellyCoinflipEnv(gym.Env):
     """The Kelly coinflip game is a simple gambling introduced by Haghani & Dewey 2016's 'Rational Decision-Making Under Uncertainty: Observed Betting Patterns on a Biased Coin' (https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2856963), to test human decision-making in a setting like that of the stock market: positive expected value but highly stochastic; they found many subjects performed badly, often going broke, even though optimal play would reach the maximum with ~95% probability. In the coinflip game, the player starts with $25.00 to gamble over 300 rounds; each round, they can bet anywhere up to their net worth (in penny increments), and then a coin is flipped; with P=0.6, the player wins twice what they bet, otherwise, they lose it. $250 is the maximum players are allowed to have. At the end of the 300 rounds, they keep whatever they have. The human subjects earned an average of $91; a simple use of the Kelly criterion (https://en.wikipedia.org/wiki/Kelly_criterion), giving a strategy of betting 20% until the cap is hit, would earn $240; a decision tree analysis shows that optimal play earns $246 (https://www.gwern.net/Coin-flip). The game short-circuits when either wealth = $0 (since one can never recover) or wealth = cap (trivial optimal play: one simply bets nothing thereafter). In this implementation, we default to the paper settings of $25, 60% odds, wealth cap of $250, and 300 rounds. To specify the action space in advance, we multiply the wealth cap (in dollars) by 100 (to allow for all penny bets); should one attempt to bet more money than one has, it is rounded down to one's net worth. (Alternately, a mistaken bet could end the episode immediately; it's not clear to me which version would be better.) For a harder version which randomizes the 3 key parameters, see the Generalized Kelly coinflip game."""
     metadata = {'render.modes': ['human']}
     def __init__(self, initialWealth=25.0, edge=0.6, maxWealth=250.0, maxRounds=300):
 
         self.action_space = spaces.Discrete(int(maxWealth*100)) # betting in penny increments
         self.observation_space = spaces.Tuple((
-            spaces.Box(0, maxWealth, [1]), # (w,b)
+            spaces.Box(0, maxWealth, [1], dtype=np.float32), # (w,b)
             spaces.Discrete(maxRounds+1)))
         self.reward_range = (0, maxWealth)
         self.edge = edge
@@ -95,11 +98,11 @@ def __init__(self, initialWealth=25.0, edgePriorAlpha=7, edgePriorBeta=3, maxWea
         # the rest proceeds as before:
         self.action_space = spaces.Discrete(int(maxWealth*100))
         self.observation_space = spaces.Tuple((
-            spaces.Box(0, maxWealth, shape=[1]), # current wealth
+            spaces.Box(0, maxWealth, shape=[1], dtype=np.float32), # current wealth
             spaces.Discrete(maxRounds+1), # rounds elapsed
             spaces.Discrete(maxRounds+1), # wins
             spaces.Discrete(maxRounds+1), # losses
-            spaces.Box(0, maxWealth, [1]))) # maximum observed wealth
+            spaces.Box(0, maxWealth, [1], dtype=np.float32))) # maximum observed wealth
         self.reward_range = (0, maxWealth)
         self.edge = edge
         self.wealth = self.initialWealth

diff --git a/gym/logger.py b/gym/logger.py
@@ -1,3 +1,5 @@
+import warnings
+
 from gym.utils import colorize
 
 DEBUG = 10
@@ -25,7 +27,7 @@ def info(msg, *args):
 
 def warn(msg, *args):
     if MIN_LEVEL <= WARN:
-        print(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
+        warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
 
 def error(msg, *args):
     if MIN_LEVEL <= ERROR:

diff --git a/gym/spaces/box.py b/gym/spaces/box.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 import gym
 from gym import logger
 
@@ -28,7 +29,7 @@ def __init__(self, low=None, high=None, shape=None, dtype=None):
                 dtype = np.uint8
             else:
                 dtype = np.float32
-            logger.warn("gym.spaces.Box autodetected dtype as %s. Please provide explicit dtype." % dtype)
+            logger.warn("gym.spaces.Box autodetected dtype as {}. Please provide explicit dtype.".format(dtype))
         self.low = low.astype(dtype)
         self.high = high.astype(dtype)
         gym.Space.__init__(self, shape, dtype)
@@ -47,6 +48,6 @@ def from_jsonable(self, sample_n):
 
     def __repr__(self):
         return "Box" + str(self.shape)
-        
+
     def __eq__(self, other):
         return np.allclose(self.low, other.low) and np.allclose(self.high, other.high)
diff --git a/gym/spaces/tests/test_spaces.py b/gym/spaces/tests/test_spaces.py
@@ -10,10 +10,11 @@
 @pytest.mark.parametrize("space", [
               Discrete(3),
               Tuple([Discrete(5), Discrete(10)]),
-              Tuple([Discrete(5), Box(low=np.array([0, 0]),high=np.array([1, 5]))]),
+              Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
               Tuple((Discrete(5), Discrete(2), Discrete(2))),
               MultiDiscrete([2, 2, 100]),
-              Dict({"position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]))}),
+              Dict({"position": Discrete(5),
+                    "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
               ])
 def test_roundtripping(space):
     sample_1 = space.sample()
@@ -39,11 +40,12 @@ def test_roundtripping(space):
               Discrete(3),
               Box(low=np.array([-10, 0]),high=np.array([10, 10])),
               Tuple([Discrete(5), Discrete(10)]),
-              Tuple([Discrete(5), Box(low=np.array([0, 0]),high=np.array([1, 5]))]),
+              Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
               Tuple((Discrete(5), Discrete(2), Discrete(2))),
               MultiDiscrete([2, 2, 100]),
               MultiBinary(6),
-              Dict({"position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]))}),
+              Dict({"position": Discrete(5),
+                    "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
               ])
 def test_equality(space):
     space1 = space
@@ -55,8 +57,8 @@ def test_equality(space):
               (Discrete(3), Discrete(4)),
               (MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])),
               (MultiBinary(8), MultiBinary(7)),
-              (Box(low=np.array([-10, 0]),high=np.array([10, 10])),
-                Box(low=np.array([-10, 0]),high=np.array([10, 9]))),
+              (Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
+                Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32)),
               (Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])),
               (Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})),
               (Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})),
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,6 +3,7 @@ @@
     *.py~
     .DS_Store
     .cache
+    .pytest_cache/
     # Setuptools distribution and build folders.
     /dist/
@@ Expand Down @@