Skip to content

Commit

Permalink
Fix autodetect dtype warnings (openai#1234)
Browse files Browse the repository at this point in the history
* Fix autodetect dtype warnings

* Use warnings module for gym logger

* Fix warning in tests
  • Loading branch information
araffin authored and pzhokhov committed Nov 29, 2018
1 parent e09b0f5 commit cdd212d
Show file tree
Hide file tree
Showing 12 changed files with 65 additions and 37 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
*.py~
.DS_Store
.cache
.pytest_cache/

# Setuptools distribution and build folders.
/dist/
Expand Down
11 changes: 6 additions & 5 deletions gym/envs/box2d/bipedal_walker.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import sys, math
import numpy as np
import sys
import math

import numpy as np
import Box2D
from Box2D.b2 import (edgeShape, circleShape, fixtureDef, polygonShape, revoluteJointDef, contactListener)

Expand Down Expand Up @@ -137,9 +138,9 @@ def __init__(self):

self.reset()

high = np.array([np.inf]*24)
self.action_space = spaces.Box(np.array([-1,-1,-1,-1]), np.array([+1,+1,+1,+1]))
self.observation_space = spaces.Box(-high, high)
high = np.array([np.inf] * 24)
self.action_space = spaces.Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32)
self.observation_space = spaces.Box(-high, high, dtype=np.float32)

def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
Expand Down
7 changes: 4 additions & 3 deletions gym/envs/classic_control/acrobot.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
"""classic Acrobot task"""
from gym import core, spaces
from gym.utils import seeding
import numpy as np
from numpy import sin, cos, pi

from gym import core, spaces
from gym.utils import seeding

__copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
__credits__ = ["Alborz Geramifard", "Robert H. Klein", "Christoph Dann",
"William Dabney", "Jonathan P. How"]
Expand Down Expand Up @@ -86,7 +87,7 @@ def __init__(self):
self.viewer = None
high = np.array([1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2])
low = -high
self.observation_space = spaces.Box(low=low, high=high)
self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
self.action_space = spaces.Discrete(3)
self.state = None
self.seed()
Expand Down
12 changes: 8 additions & 4 deletions gym/envs/classic_control/continuous_mountain_car.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,18 @@
and then modified by Arnaud de Broissia
* the OpenAI/gym MountainCar environment
itself from
itself from
http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
permalink: https://perma.cc/6Z2N-PFWC
"""

import math

import numpy as np

import gym
from gym import spaces
from gym.utils import seeding
import numpy as np

class Continuous_MountainCarEnv(gym.Env):
metadata = {
Expand All @@ -40,8 +42,10 @@ def __init__(self):

self.viewer = None

self.action_space = spaces.Box(low=self.min_action, high=self.max_action, shape=(1,))
self.observation_space = spaces.Box(low=self.low_state, high=self.high_state)
self.action_space = spaces.Box(low=self.min_action, high=self.max_action,
shape=(1,), dtype=np.float32)
self.observation_space = spaces.Box(low=self.low_state, high=self.high_state,
dtype=np.float32)

self.seed()
self.reset()
Expand Down
6 changes: 4 additions & 2 deletions gym/envs/classic_control/mountain_car.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
"""

import math

import numpy as np

import gym
from gym import spaces
from gym.utils import seeding
import numpy as np

class MountainCarEnv(gym.Env):
metadata = {
Expand All @@ -27,7 +29,7 @@ def __init__(self):
self.viewer = None

self.action_space = spaces.Discrete(3)
self.observation_space = spaces.Box(self.low, self.high)
self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)

self.seed()
self.reset()
Expand Down
13 changes: 10 additions & 3 deletions gym/envs/tests/test_envs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import pytest
import numpy as np

from gym import envs
from gym.envs.tests.spec_list import spec_list

Expand All @@ -8,7 +9,14 @@
# envs.
@pytest.mark.parametrize("spec", spec_list)
def test_env(spec):
env = spec.make()
# Capture warnings
with pytest.warns(None) as warnings:
env = spec.make()

# Check that dtype is explicitly declared for gym.Box spaces
for warning_msg in warnings:
assert not 'autodetected dtype' in str(warning_msg.message)

ob_space = env.observation_space
act_space = env.action_space
ob = env.reset()
Expand Down Expand Up @@ -40,4 +48,3 @@ def test_random_rollout():
(ob, _reward, done, _info) = env.step(a)
if done: break
env.close()

6 changes: 4 additions & 2 deletions gym/envs/toy_text/guessing_game.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import numpy as np

import gym
from gym import spaces
from gym.utils import seeding
import numpy as np


class GuessingGame(gym.Env):
Expand Down Expand Up @@ -40,7 +41,8 @@ def __init__(self):
self.range = 1000 # Randomly selected number is within +/- this value
self.bounds = 10000

self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]))
self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
dtype=np.float32)
self.observation_space = spaces.Discrete(4)

self.number = 0
Expand Down
6 changes: 4 additions & 2 deletions gym/envs/toy_text/hotter_colder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import numpy as np

import gym
from gym import spaces
from gym.utils import seeding
import numpy as np


class HotterColder(gym.Env):
Expand All @@ -25,7 +26,8 @@ def __init__(self):
self.range = 1000 # +/- value the randomly select number can be between
self.bounds = 2000 # Action space bounds

self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]))
self.action_space = spaces.Box(low=np.array([-self.bounds]), high=np.array([self.bounds]),
dtype=np.float32)
self.observation_space = spaces.Discrete(4)

self.number = 0
Expand Down
17 changes: 10 additions & 7 deletions gym/envs/toy_text/kellycoinflip.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
import gym
from gym import spaces
from gym.utils import seeding
from gym.spaces import prng
# for Generalized Kelly coinflip game distributions:
from scipy.stats import genpareto
import numpy as np
import numpy.random

import gym
from gym import spaces
from gym.utils import seeding
from gym.spaces import prng


def flip(edge, np_random):
return np_random.uniform() < edge


class KellyCoinflipEnv(gym.Env):
"""The Kelly coinflip game is a simple gambling introduced by Haghani & Dewey 2016's 'Rational Decision-Making Under Uncertainty: Observed Betting Patterns on a Biased Coin' (https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2856963), to test human decision-making in a setting like that of the stock market: positive expected value but highly stochastic; they found many subjects performed badly, often going broke, even though optimal play would reach the maximum with ~95% probability. In the coinflip game, the player starts with $25.00 to gamble over 300 rounds; each round, they can bet anywhere up to their net worth (in penny increments), and then a coin is flipped; with P=0.6, the player wins twice what they bet, otherwise, they lose it. $250 is the maximum players are allowed to have. At the end of the 300 rounds, they keep whatever they have. The human subjects earned an average of $91; a simple use of the Kelly criterion (https://en.wikipedia.org/wiki/Kelly_criterion), giving a strategy of betting 20% until the cap is hit, would earn $240; a decision tree analysis shows that optimal play earns $246 (https://www.gwern.net/Coin-flip). The game short-circuits when either wealth = $0 (since one can never recover) or wealth = cap (trivial optimal play: one simply bets nothing thereafter). In this implementation, we default to the paper settings of $25, 60% odds, wealth cap of $250, and 300 rounds. To specify the action space in advance, we multiply the wealth cap (in dollars) by 100 (to allow for all penny bets); should one attempt to bet more money than one has, it is rounded down to one's net worth. (Alternately, a mistaken bet could end the episode immediately; it's not clear to me which version would be better.) For a harder version which randomizes the 3 key parameters, see the Generalized Kelly coinflip game."""
metadata = {'render.modes': ['human']}
def __init__(self, initialWealth=25.0, edge=0.6, maxWealth=250.0, maxRounds=300):

self.action_space = spaces.Discrete(int(maxWealth*100)) # betting in penny increments
self.observation_space = spaces.Tuple((
spaces.Box(0, maxWealth, [1]), # (w,b)
spaces.Box(0, maxWealth, [1], dtype=np.float32), # (w,b)
spaces.Discrete(maxRounds+1)))
self.reward_range = (0, maxWealth)
self.edge = edge
Expand Down Expand Up @@ -95,11 +98,11 @@ def __init__(self, initialWealth=25.0, edgePriorAlpha=7, edgePriorBeta=3, maxWea
# the rest proceeds as before:
self.action_space = spaces.Discrete(int(maxWealth*100))
self.observation_space = spaces.Tuple((
spaces.Box(0, maxWealth, shape=[1]), # current wealth
spaces.Box(0, maxWealth, shape=[1], dtype=np.float32), # current wealth
spaces.Discrete(maxRounds+1), # rounds elapsed
spaces.Discrete(maxRounds+1), # wins
spaces.Discrete(maxRounds+1), # losses
spaces.Box(0, maxWealth, [1]))) # maximum observed wealth
spaces.Box(0, maxWealth, [1], dtype=np.float32))) # maximum observed wealth
self.reward_range = (0, maxWealth)
self.edge = edge
self.wealth = self.initialWealth
Expand Down
4 changes: 3 additions & 1 deletion gym/logger.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import warnings

from gym.utils import colorize

DEBUG = 10
Expand Down Expand Up @@ -25,7 +27,7 @@ def info(msg, *args):

def warn(msg, *args):
if MIN_LEVEL <= WARN:
print(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))

def error(msg, *args):
if MIN_LEVEL <= ERROR:
Expand Down
5 changes: 3 additions & 2 deletions gym/spaces/box.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np

import gym
from gym import logger

Expand Down Expand Up @@ -28,7 +29,7 @@ def __init__(self, low=None, high=None, shape=None, dtype=None):
dtype = np.uint8
else:
dtype = np.float32
logger.warn("gym.spaces.Box autodetected dtype as %s. Please provide explicit dtype." % dtype)
logger.warn("gym.spaces.Box autodetected dtype as {}. Please provide explicit dtype.".format(dtype))
self.low = low.astype(dtype)
self.high = high.astype(dtype)
gym.Space.__init__(self, shape, dtype)
Expand All @@ -47,6 +48,6 @@ def from_jsonable(self, sample_n):

def __repr__(self):
return "Box" + str(self.shape)

def __eq__(self, other):
return np.allclose(self.low, other.low) and np.allclose(self.high, other.high)
14 changes: 8 additions & 6 deletions gym/spaces/tests/test_spaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
@pytest.mark.parametrize("space", [
Discrete(3),
Tuple([Discrete(5), Discrete(10)]),
Tuple([Discrete(5), Box(low=np.array([0, 0]),high=np.array([1, 5]))]),
Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
Dict({"position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]))}),
Dict({"position": Discrete(5),
"velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
])
def test_roundtripping(space):
sample_1 = space.sample()
Expand All @@ -39,11 +40,12 @@ def test_roundtripping(space):
Discrete(3),
Box(low=np.array([-10, 0]),high=np.array([10, 10])),
Tuple([Discrete(5), Discrete(10)]),
Tuple([Discrete(5), Box(low=np.array([0, 0]),high=np.array([1, 5]))]),
Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]),
Tuple((Discrete(5), Discrete(2), Discrete(2))),
MultiDiscrete([2, 2, 100]),
MultiBinary(6),
Dict({"position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]))}),
Dict({"position": Discrete(5),
"velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}),
])
def test_equality(space):
space1 = space
Expand All @@ -55,8 +57,8 @@ def test_equality(space):
(Discrete(3), Discrete(4)),
(MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])),
(MultiBinary(8), MultiBinary(7)),
(Box(low=np.array([-10, 0]),high=np.array([10, 10])),
Box(low=np.array([-10, 0]),high=np.array([10, 9]))),
(Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32),
Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32)),
(Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(1), Discrete(10)])),
(Dict({"position": Discrete(5)}), Dict({"position": Discrete(4)})),
(Dict({"position": Discrete(5)}), Dict({"speed": Discrete(5)})),
Expand Down

0 comments on commit cdd212d

Please sign in to comment.