Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Tuple and Dicts be seedable with lists and dicts of seeds + make the seed in default initialization controllable #1774

Merged
merged 16 commits into from
Sep 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions gym/spaces/box.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class Box(Space):

"""

def __init__(self, low, high, shape=None, dtype=np.float32):
def __init__(self, low, high, shape=None, dtype=np.float32, seed=None):
assert dtype is not None, "dtype must be explicitly provided. "
self.dtype = np.dtype(dtype)

Expand Down Expand Up @@ -81,7 +81,7 @@ def _get_precision(dtype):
self.bounded_below = -np.inf < self.low
self.bounded_above = np.inf > self.high

super(Box, self).__init__(self.shape, self.dtype)
super(Box, self).__init__(self.shape, self.dtype, seed)

def is_bounded(self, manner="both"):
below = np.all(self.bounded_below)
Expand Down
58 changes: 38 additions & 20 deletions gym/spaces/dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@ class Dict(Space):
})
"""

def __init__(self, spaces=None, **spaces_kwargs):
def __init__(self, spaces=None, seed=None, **spaces_kwargs):
assert (spaces is None) or (
not spaces_kwargs
), "Use either Dict(spaces=dict(...)) or Dict(foo=x, bar=z)"

if spaces is None:
spaces = spaces_kwargs
if isinstance(spaces, dict) and not isinstance(spaces, OrderedDict):
Expand All @@ -49,28 +50,45 @@ def __init__(self, spaces=None, **spaces_kwargs):
space, Space
), "Values of the dict should be instances of gym.Space"
super(Dict, self).__init__(
None, None
None, None, seed
) # None for shape and dtype, since it'll require special handling

def seed(self, seed=None):
seed = super().seed(seed)
try:
subseeds = self.np_random.choice(
np.iinfo(int).max,
size=len(self.spaces),
replace=False, # unique subseed for each subspace
)
except ValueError:
subseeds = self.np_random.choice(
np.iinfo(int).max,
size=len(self.spaces),
replace=True, # we get more than INT_MAX subspaces
)

for subspace, subseed in zip(self.spaces.values(), subseeds):
seed.append(subspace.seed(int(subseed))[0])

return seed
seeds = []
if isinstance(seed, dict):
for key, seed_key in zip(self.spaces, seed):
assert key == seed_key, print(
"Key value",
seed_key,
"in passed seed dict did not match key value",
key,
"in spaces Dict.",
)
seeds += self.spaces[key].seed(seed[seed_key])
elif isinstance(seed, int):
seeds = super().seed(seed)
try:
subseeds = self.np_random.choice(
np.iinfo(int).max,
size=len(self.spaces),
replace=False, # unique subseed for each subspace
)
except ValueError:
subseeds = self.np_random.choice(
np.iinfo(int).max,
size=len(self.spaces),
replace=True, # we get more than INT_MAX subspaces
)

for subspace, subseed in zip(self.spaces.values(), subseeds):
seeds.append(subspace.seed(int(subseed))[0])
elif seed is None:
for space in self.spaces.values():
seeds += space.seed(seed)
else:
raise TypeError("Passed seed not of an expected type: dict or int or None")

return seeds
Comment on lines 56 to +91
Copy link
Contributor

@XuehaiPan XuehaiPan Sep 19, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Should we call super().seed(seed) to seed self.np_random as well when seed is dict or None here?

  2. We could merge the last two cases (seed is int or None) into one. After statement seeds = super().seed(seed), self.np_random become seeded and variable seeds is a list of integers.

  3. Should we only add the main seed of subspace (subspace.seed()[0]) to the return value rather than extend all items in subspace.seed() (the op seeds += in the first and the last cases)?
    If the subpace of Dict here is another compound space (Tuple or Dict), the length of the return list could be different len(d.seed(None)) != len(d.seed(0)). (However, I think we probably only use the first item, and hardly ever use the rest in the list and the length of the list. This issue probably never affects normal users.)

    def test_seed_returns_list(space):
    def assert_integer_list(seed):
    assert isinstance(seed, list)
    assert len(seed) >= 1
    assert all([isinstance(s, int) for s in seed])
    assert_integer_list(space.seed(None))
    assert_integer_list(space.seed(0))

    We could add a new test assert len(space.seed(None)) == len(space.seed(0)) here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. I'd say it causes minimal harm and could be done. Doing this would create the _np_random PRNG of the "base" class's object and make it consistent with other spaces in case all Space objects are expected to have an _np_random that is not None, although I assume that only _np_randoms of the sub-classes are ever used.
  2. It's subjective. For the case, when it's None, I assume the user would want the seed of every sub-space to be as random as possible in the sense that they would want all of them to be seeded with None. But generating the seeds for sub-spaces using an _np_random PRNG generated based on None (i.e., the suggested way) should also be fine.
  3. I don't think that there's an expected fixed length of the list of seeds returned, so I do not think it makes a difference either way. In any case, the length of the list of seeds returned can always be calculated if the sub-spaces are known, in case a user really wants dig deeper.

So, in general, I'm fine with any of the suggestions either being or not being implemented (also for the suggestions on Tuple below). Please let me know.


def sample(self):
return OrderedDict([(k, space.sample()) for k, space in self.spaces.items()])
Expand Down
4 changes: 2 additions & 2 deletions gym/spaces/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ class Discrete(Space):

"""

def __init__(self, n):
def __init__(self, n, seed=None):
assert n >= 0
self.n = n
super(Discrete, self).__init__((), np.int64)
super(Discrete, self).__init__((), np.int64, seed)

def sample(self):
return self.np_random.randint(self.n)
Expand Down
4 changes: 2 additions & 2 deletions gym/spaces/multi_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ class MultiBinary(Space):

"""

def __init__(self, n):
def __init__(self, n, seed=None):
self.n = n
if type(n) in [tuple, list, np.ndarray]:
input_n = n
else:
input_n = (n,)
super(MultiBinary, self).__init__(input_n, np.int8)
super(MultiBinary, self).__init__(input_n, np.int8, seed)

def sample(self):
return self.np_random.randint(low=0, high=2, size=self.n, dtype=self.dtype)
Expand Down
4 changes: 2 additions & 2 deletions gym/spaces/multi_discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ class MultiDiscrete(Space):

"""

def __init__(self, nvec, dtype=np.int64):
def __init__(self, nvec, dtype=np.int64, seed=None):
"""
nvec: vector of counts of each categorical variable
"""
assert (np.array(nvec) > 0).all(), "nvec (counts) have to be positive"
self.nvec = np.asarray(nvec, dtype=dtype)

super(MultiDiscrete, self).__init__(self.nvec.shape, dtype)
super(MultiDiscrete, self).__init__(self.nvec.shape, dtype, seed)

def sample(self):
return (self.np_random.random_sample(self.nvec.shape) * self.nvec).astype(
Expand Down
4 changes: 3 additions & 1 deletion gym/spaces/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ class Space(object):
not handle custom spaces properly. Use custom spaces with care.
"""

def __init__(self, shape=None, dtype=None):
def __init__(self, shape=None, dtype=None, seed=None):
import numpy as np # takes about 300-400ms to import, so we load lazily

self._shape = None if shape is None else tuple(shape)
self.dtype = None if dtype is None else np.dtype(dtype)
self._np_random = None
if seed is not None:
self.seed(seed)

@property
def np_random(self):
Expand Down
47 changes: 47 additions & 0 deletions gym/spaces/tests/test_spaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,53 @@ def test_bad_space_calls(space_fn):
space_fn()


def test_seed_Dict():
test_space = Dict(
{
"a": Box(low=0, high=1, shape=(3, 3)),
"b": Dict(
{
"b_1": Box(low=-100, high=100, shape=(2,)),
"b_2": Box(low=-1, high=1, shape=(2,)),
}
),
"c": Discrete(5),
}
)

seed_dict = {
"a": 0,
"b": {
"b_1": 1,
"b_2": 2,
},
"c": 3,
}

test_space.seed(seed_dict)

# "Unpack" the dict sub-spaces into individual spaces
a = Box(low=0, high=1, shape=(3, 3))
a.seed(0)
b_1 = Box(low=-100, high=100, shape=(2,))
b_1.seed(1)
b_2 = Box(low=-1, high=1, shape=(2,))
b_2.seed(2)
c = Discrete(5)
c.seed(3)

for i in range(10):
test_s = test_space.sample()
a_s = a.sample()
assert (test_s["a"] == a_s).all()
b_1_s = b_1.sample()
assert (test_s["b"]["b_1"] == b_1_s).all()
b_2_s = b_2.sample()
assert (test_s["b"]["b_2"] == b_2_s).all()
c_s = c.sample()
assert test_s["c"] == c_s


def test_box_dtype_check():
# Related Issues:
# https://github.com/openai/gym/issues/2357
Expand Down
47 changes: 29 additions & 18 deletions gym/spaces/tuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,44 @@ class Tuple(Space):
self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3)))
"""

def __init__(self, spaces):
def __init__(self, spaces, seed=None):
self.spaces = spaces
for space in spaces:
assert isinstance(
space, Space
), "Elements of the tuple must be instances of gym.Space"
super(Tuple, self).__init__(None, None)
super(Tuple, self).__init__(None, None, seed)

def seed(self, seed=None):
seed = super().seed(seed)
try:
subseeds = self.np_random.choice(
np.iinfo(int).max,
size=len(self.spaces),
replace=False, # unique subseed for each subspace
)
except ValueError:
subseeds = self.np_random.choice(
np.iinfo(int).max,
size=len(self.spaces),
replace=True, # we get more than INT_MAX subspaces
)
seeds = []

if isinstance(seed, list):
for i, space in enumerate(self.spaces):
seeds += space.seed(seed[i])
elif isinstance(seed, int):
seeds = super().seed(seed)
try:
subseeds = self.np_random.choice(
np.iinfo(int).max,
size=len(self.spaces),
replace=False, # unique subseed for each subspace
)
except ValueError:
subseeds = self.np_random.choice(
np.iinfo(int).max,
size=len(self.spaces),
replace=True, # we get more than INT_MAX subspaces
)

for subspace, subseed in zip(self.spaces, subseeds):
seed.append(subspace.seed(int(subseed))[0])
for subspace, subseed in zip(self.spaces, subseeds):
seeds.append(subspace.seed(int(subseed))[0])
elif seed is None:
for space in self.spaces:
seeds += space.seed(seed)
else:
raise TypeError("Passed seed not of an expected type: list or int or None")

return seed
return seeds
Comment on lines 21 to +50
Copy link
Contributor

@XuehaiPan XuehaiPan Sep 19, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment here for Tuple (see Dict above).

  1. Call super().seed(seed).
  2. Merge cases seed is int and seed is None.
  3. Only add the main seed of the subspace to the return value.


def sample(self):
return tuple([space.sample() for space in self.spaces])
Expand Down