From aa29f27372ea7b879b93504cd7db5c43e0cd2800 Mon Sep 17 00:00:00 2001 From: Daniel <13357734+thedch@users.noreply.github.com> Date: Thu, 12 Dec 2024 10:49:20 -0800 Subject: [PATCH 01/21] Update pong.py --- pufferlib/ocean/pong/pong.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pufferlib/ocean/pong/pong.py b/pufferlib/ocean/pong/pong.py index 6ec04ecd..9f1fe7d1 100644 --- a/pufferlib/ocean/pong/pong.py +++ b/pufferlib/ocean/pong/pong.py @@ -11,6 +11,7 @@ import pufferlib from pufferlib.ocean.pong.cy_pong import CyPong + class Pong(pufferlib.PufferEnv): def __init__(self, num_envs=1, render_mode=None, width=500, height=640, paddle_width=20, paddle_height=70, @@ -18,8 +19,9 @@ def __init__(self, num_envs=1, render_mode=None, ball_initial_speed_x=10, ball_initial_speed_y=1, ball_speed_y_increment=3, ball_max_speed_y=13, max_score=21, frameskip=1, report_interval=1, buf=None): - self.single_observation_space = gymnasium.spaces.Box(low=0, high=1, - shape=(8,), dtype=np.float32) + self.single_observation_space = gymnasium.spaces.Box( + low=0, high=1, shape=(8,), dtype=np.float32, + ) self.single_action_space = gymnasium.spaces.Discrete(3) self.render_mode = render_mode self.num_agents = num_envs @@ -65,7 +67,7 @@ def test_performance(timeout=10, atn_cache=1024): env.reset() tick = 0 - actions = np.random.randint(0, 2, (atn_cache, env.num_envs)) + actions = np.random.randint(0, 2, (atn_cache, env.num_agents)) import time start = time.time() @@ -74,7 +76,8 @@ def test_performance(timeout=10, atn_cache=1024): env.step(atn) tick += 1 - print(f'SPS: %f', env.num_envs * tick / (time.time() - start)) + print('SPS: {env.num_agents * tick / (time.time() - start)}) + if __name__ == '__main__': test_performance() From d06946e0806610d1696e95590687a0b0753da3a4 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Sat, 21 Dec 2024 21:12:55 +0000 Subject: [PATCH 02/21] Makes PufferLib compatible with Python3.12. Probably doesn't break anything important --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 58f9501f..9d80dfaf 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,7 @@ 'tensorboard==2.11.2', 'torch', 'tyro==0.8.6', - 'wandb==0.13.7', + 'wandb==0.19.1', ] ray = [ @@ -281,7 +281,7 @@ }, include_package_data=True, install_requires=[ - 'numpy==1.23.3', + 'numpy==2.2.0', 'opencv-python==3.4.17.63', 'cython>=3.0.0', 'rich', From 952e935c4fb5aff12af64c39a0e7e0b9835ba281 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Sat, 21 Dec 2024 21:23:57 +0000 Subject: [PATCH 03/21] Fix bug --- pufferlib/ocean/pong/pong.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/ocean/pong/pong.py b/pufferlib/ocean/pong/pong.py index 9f1fe7d1..dc2685e0 100644 --- a/pufferlib/ocean/pong/pong.py +++ b/pufferlib/ocean/pong/pong.py @@ -76,7 +76,7 @@ def test_performance(timeout=10, atn_cache=1024): env.step(atn) tick += 1 - print('SPS: {env.num_agents * tick / (time.time() - start)}) + print(f'SPS: {env.num_agents * tick / (time.time() - start)}') if __name__ == '__main__': From a5a054b94f2ad4ad8f00fa958310dba477a7cfb7 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Sat, 21 Dec 2024 21:36:54 +0000 Subject: [PATCH 04/21] Revert numpy --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9d80dfaf..983d339e 100644 --- a/setup.py +++ b/setup.py @@ -281,7 +281,7 @@ }, include_package_data=True, install_requires=[ - 'numpy==2.2.0', + 'numpy==1.23.3', 'opencv-python==3.4.17.63', 'cython>=3.0.0', 'rich', From e5168f938e360bd7b1c244710dd3ec3f5ae1067a Mon Sep 17 00:00:00 2001 From: thatguy11325 <148832074+thatguy11325@users.noreply.github.com> Date: Tue, 24 Dec 2024 09:45:05 -0500 Subject: [PATCH 05/21] add a barebones github action and status badge --- .github/workflows/install.yml | 27 +++++++++++++++++++++++++++ README.md | 2 ++ 2 files changed, 29 insertions(+) create mode 100644 .github/workflows/install.yml diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml new file mode 100644 index 00000000..da7fa6a5 --- /dev/null +++ b/.github/workflows/install.yml @@ -0,0 +1,27 @@ +name: install +on: + push: + pull_request: + +jobs: + test: + name: test ${{ matrix.py }} - ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - ubuntu-latest + py: + - "3.11" + - "3.10" + steps: + - name: Setup python for test ${{ matrix.py }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.py }} + - uses: actions/checkout@v3 + - name: Upgrade pip + run: python -m pip install -U pip + - name: Install pufferlib + run: pip3 install -e . \ No newline at end of file diff --git a/README.md b/README.md index b6614449..dc1bfaa8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ ![figure](https://pufferai.github.io/source/resource/header.png) [![PyPI version](https://badge.fury.io/py/pufferlib.svg)](https://badge.fury.io/py/pufferlib) +![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pufferlib) +![Github Actions](https://github.com/PufferAI/PufferLib/actions/workflows/install.yml/badge.svg) [![](https://dcbadge.vercel.app/api/server/spT4huaGYV?style=plastic)](https://discord.gg/spT4huaGYV) [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Follow%20%40jsuarez5341)](https://twitter.com/jsuarez5341) From 7fa25a8d2e499375570baffe29ee6d5581ad3192 Mon Sep 17 00:00:00 2001 From: thatguy11325 <148832074+thatguy11325@users.noreply.github.com> Date: Tue, 24 Dec 2024 14:18:26 -0500 Subject: [PATCH 06/21] add macos --- .github/workflows/install.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index da7fa6a5..5ebe7dc0 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -12,9 +12,12 @@ jobs: matrix: os: - ubuntu-latest + - macos-latest py: - "3.11" - "3.10" + - "3.9" + - "3.8" steps: - name: Setup python for test ${{ matrix.py }} uses: actions/setup-python@v4 From 4e793cf35d151248fb3c598fedc6555004ba82ad Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 24 Dec 2024 15:35:05 -0500 Subject: [PATCH 07/21] Update install.yml --- .github/workflows/install.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 5ebe7dc0..873d3c5c 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -17,7 +17,6 @@ jobs: - "3.11" - "3.10" - "3.9" - - "3.8" steps: - name: Setup python for test ${{ matrix.py }} uses: actions/setup-python@v4 @@ -27,4 +26,4 @@ jobs: - name: Upgrade pip run: python -m pip install -U pip - name: Install pufferlib - run: pip3 install -e . \ No newline at end of file + run: pip3 install -e . From c1dc5491553e997ecd28eacf1b5290759a599a3b Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 15 Jan 2025 02:08:26 +0000 Subject: [PATCH 08/21] Very dumb change. Prefix c functions with c_ to prevent Conda's garbage compiler from breaking --- config/default.ini | 1 + config/ocean/connect4.ini | 1 + config/ocean/go.ini | 1 + config/ocean/grid.ini | 1 + config/ocean/moba.ini | 1 + config/ocean/nmmo3.ini | 3 ++- config/ocean/snake.ini | 1 + config/ocean/trash_pickup.ini | 1 + demo.py | 22 ++++++++++--------- pufferlib/ocean/breakout/breakout.h | 8 +++---- pufferlib/ocean/breakout/cy_breakout.pyx | 12 +++++----- pufferlib/ocean/connect4/connect4.h | 8 +++---- pufferlib/ocean/connect4/cy_connect4.pyx | 12 +++++----- pufferlib/ocean/enduro/cy_enduro.pyx | 8 +++---- pufferlib/ocean/enduro/enduro.h | 4 ++-- pufferlib/ocean/go/cy_go.pyx | 16 ++++++-------- pufferlib/ocean/go/go.h | 10 ++++----- pufferlib/ocean/moba/cy_moba.pyx | 8 +++---- pufferlib/ocean/moba/moba.h | 8 +++---- pufferlib/ocean/nmmo3/cy_nmmo3.pyx | 8 +++---- pufferlib/ocean/nmmo3/nmmo3.h | 6 ++--- pufferlib/ocean/pong/cy_pong.pyx | 12 +++++----- pufferlib/ocean/pong/pong.h | 10 ++++----- pufferlib/ocean/rware/cy_rware.pyx | 12 +++++----- pufferlib/ocean/rware/rware.h | 6 ++--- pufferlib/ocean/snake/cy_snake.pyx | 12 +++++----- pufferlib/ocean/snake/snake.h | 9 +++++--- pufferlib/ocean/squared/cy_squared.pyx | 13 +++++------ pufferlib/ocean/squared/squared.h | 10 ++++----- pufferlib/ocean/torch.py | 5 +---- .../ocean/trash_pickup/cy_trash_pickup.pyx | 13 +++++------ pufferlib/ocean/trash_pickup/trash_pickup.h | 16 ++++++++------ .../ocean/tripletriad/cy_tripletriad.pyx | 12 +++++----- pufferlib/ocean/tripletriad/tripletriad.h | 8 +++---- setup.py | 22 ++++++++++++++----- 35 files changed, 160 insertions(+), 140 deletions(-) diff --git a/config/default.ini b/config/default.ini index 6c41c811..6c923e0f 100644 --- a/config/default.ini +++ b/config/default.ini @@ -1,6 +1,7 @@ [base] package = None env_name = None +vec = native policy_name = Policy rnn_name = None max_suggestion_cost = 3600 diff --git a/config/ocean/connect4.ini b/config/ocean/connect4.ini index 64964e82..2ad624b7 100644 --- a/config/ocean/connect4.ini +++ b/config/ocean/connect4.ini @@ -1,6 +1,7 @@ [base] package = ocean env_name = puffer_connect4 +vec = multiprocessing policy_name = Policy rnn_name = Recurrent diff --git a/config/ocean/go.ini b/config/ocean/go.ini index 9790355f..a6397b0b 100644 --- a/config/ocean/go.ini +++ b/config/ocean/go.ini @@ -1,6 +1,7 @@ [base] package = ocean env_name = puffer_go +vec = multiprocessing policy_name = Go rnn_name = Recurrent diff --git a/config/ocean/grid.ini b/config/ocean/grid.ini index 95ec3f1f..0137d7a3 100644 --- a/config/ocean/grid.ini +++ b/config/ocean/grid.ini @@ -1,6 +1,7 @@ [base] package = ocean env_name = puffer_grid +vec = multiprocessing policy_name = Policy rnn_name = Recurrent diff --git a/config/ocean/moba.ini b/config/ocean/moba.ini index eaccece5..463739dd 100644 --- a/config/ocean/moba.ini +++ b/config/ocean/moba.ini @@ -1,6 +1,7 @@ [base] package = ocean env_name = puffer_moba +vec = multiprocessing policy_name = MOBA rnn_name = Recurrent diff --git a/config/ocean/nmmo3.ini b/config/ocean/nmmo3.ini index 3b4d8329..19b7c751 100644 --- a/config/ocean/nmmo3.ini +++ b/config/ocean/nmmo3.ini @@ -1,6 +1,7 @@ [base] package = ocean -env_name = nmmo3 +env_name = puffer_nmmo3 +vec = multiprocessing policy_name = NMMO3 rnn_name = NMMO3LSTM diff --git a/config/ocean/snake.ini b/config/ocean/snake.ini index 4954254a..182acee6 100644 --- a/config/ocean/snake.ini +++ b/config/ocean/snake.ini @@ -1,6 +1,7 @@ [base] package = ocean env_name = puffer_snake +vec = multiprocessing rnn_name = Recurrent [env] diff --git a/config/ocean/trash_pickup.ini b/config/ocean/trash_pickup.ini index 9a07defa..c22eea6d 100644 --- a/config/ocean/trash_pickup.ini +++ b/config/ocean/trash_pickup.ini @@ -1,6 +1,7 @@ [base] package = ocean env_name = trash_pickup puffer_trash_pickup +vec = multiprocessing policy_name = TrashPickup rnn_name = Recurrent diff --git a/demo.py b/demo.py index fbe93994..6a7192c0 100644 --- a/demo.py +++ b/demo.py @@ -199,13 +199,13 @@ def carbs_param(group, name, space, wandb_params, mmin=None, mmax=None, is_wandb_logging_enabled=False, resample_frequency=5, num_random_samples=len(param_spaces), - max_suggestion_cost=args['base']['max_suggestion_cost'], + max_suggestion_cost=args['max_suggestion_cost'], is_saved_on_every_observation=False, ) carbs = CARBS(carbs_params, param_spaces) # GPUDrive doesn't let you reinit the vecenv, so we have to cache it - cache_vecenv = args['base']['env_name'] == 'gpudrive' + cache_vecenv = args['env_name'] == 'gpudrive' elos = {'model_random.pt': 1000} vecenv = {'vecenv': None} # can't reassign otherwise @@ -293,7 +293,7 @@ def train(args, make_env, policy_cls, rnn_cls, wandb, elif args['vec'] == 'native': vec = pufferlib.environment.PufferEnv else: - raise ValueError(f'Invalid --vector (serial/multiprocessing/ray/native).') + raise ValueError(f'Invalid --vec (serial/multiprocessing/ray/native).') if vecenv is None: vecenv = pufferlib.vector.make( @@ -360,8 +360,6 @@ def train(args, make_env, policy_cls, rnn_cls, wandb, default='puffer_squared', help='Name of specific environment to run') parser.add_argument('--mode', type=str, default='train', choices='train eval evaluate sweep sweep-carbs autotune profile'.split()) - parser.add_argument('--vec', '--vector', '--vectorization', type=str, - default='native', choices=['serial', 'multiprocessing', 'ray', 'native']) parser.add_argument('--vec-overwork', action='store_true', help='Allow vectorization to use >1 worker/core. Not recommended.') parser.add_argument('--eval-model-path', type=str, default=None, @@ -377,6 +375,7 @@ def train(args, make_env, policy_cls, rnn_cls, wandb, parser.add_argument('--wandb-group', type=str, default='debug') args = parser.parse_known_args()[0] + file_paths = glob.glob('config/**/*.ini', recursive=True) for path in file_paths: p = configparser.ConfigParser() @@ -394,7 +393,10 @@ def train(args, make_env, policy_cls, rnn_cls, wandb, for section in p.sections(): for key in p[section]: - argparse_key = f'--{section}.{key}'.replace('_', '-') + if section == 'base': + argparse_key = f'--{key}'.replace('_', '-') + else: + argparse_key = f'--{section}.{key}'.replace('_', '-') parser.add_argument(argparse_key, default=p[section][key]) # Late add help so you get a dynamic menu based on the env @@ -416,7 +418,7 @@ def train(args, make_env, policy_cls, rnn_cls, wandb, except: prev[subkey] = value - package = args['base']['package'] + package = args['package'] module_name = f'pufferlib.environments.{package}' if package == 'ocean': module_name = 'pufferlib.ocean' @@ -425,12 +427,12 @@ def train(args, make_env, policy_cls, rnn_cls, wandb, env_module = importlib.import_module(module_name) make_env = env_module.env_creator(env_name) - policy_cls = getattr(env_module.torch, args['base']['policy_name']) + policy_cls = getattr(env_module.torch, args['policy_name']) - rnn_name = args['base']['rnn_name'] + rnn_name = args['rnn_name'] rnn_cls = None if rnn_name is not None: - rnn_cls = getattr(env_module.torch, args['base']['rnn_name']) + rnn_cls = getattr(env_module.torch, args['rnn_name']) if args['baseline']: assert args['mode'] in ('train', 'eval', 'evaluate') diff --git a/pufferlib/ocean/breakout/breakout.h b/pufferlib/ocean/breakout/breakout.h index 76fb81dc..96e45fb8 100644 --- a/pufferlib/ocean/breakout/breakout.h +++ b/pufferlib/ocean/breakout/breakout.h @@ -437,7 +437,7 @@ void reset_round(Breakout* env) { env->ball_vx = 0.0; env->ball_vy = 0.0; } -void reset(Breakout* env) { +void c_reset(Breakout* env) { env->log = (Log){0}; env->score = 0; env->num_balls = 5; @@ -482,11 +482,11 @@ void step_frame(Breakout* env, int action) { env->dones[0] = 1; env->log.score = env->score; add_log(env->log_buffer, &env->log); - reset(env); + c_reset(env); } } -void step(Breakout* env) { +void c_step(Breakout* env) { env->dones[0] = 0; env->log.episode_length += 1; env->rewards[0] = 0.0; @@ -523,7 +523,7 @@ Client* make_client(Breakout* env) { return client; } -void render(Client* client, Breakout* env) { +void c_render(Client* client, Breakout* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } diff --git a/pufferlib/ocean/breakout/cy_breakout.pyx b/pufferlib/ocean/breakout/cy_breakout.pyx index 484d9266..443fd896 100644 --- a/pufferlib/ocean/breakout/cy_breakout.pyx +++ b/pufferlib/ocean/breakout/cy_breakout.pyx @@ -55,9 +55,9 @@ cdef extern from "breakout.h": Client* make_client(Breakout* env) void close_client(Client* client) - void render(Client* client, Breakout* env) - void reset(Breakout* env) - void step(Breakout* env) + void c_render(Client* client, Breakout* env) + void c_reset(Breakout* env) + void c_step(Breakout* env) cdef class CyBreakout: cdef: @@ -103,12 +103,12 @@ cdef class CyBreakout: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef Breakout* env = &self.envs[0] @@ -119,7 +119,7 @@ cdef class CyBreakout: self.client = make_client(env) os.chdir(cwd) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/connect4/connect4.h b/pufferlib/ocean/connect4/connect4.h index 5fb2bd59..cbb0261b 100644 --- a/pufferlib/ocean/connect4/connect4.h +++ b/pufferlib/ocean/connect4/connect4.h @@ -276,7 +276,7 @@ void compute_observation(CConnect4* env) { } } -void reset(CConnect4* env) { +void c_reset(CConnect4* env) { env->log = (Log){0}; env->dones[0] = NOT_DONE; env->player_pieces = 0; @@ -294,13 +294,13 @@ void finish_game(CConnect4* env, float reward) { compute_observation(env); } -void step(CConnect4* env) { +void c_step(CConnect4* env) { env->log.episode_length += 1; env->rewards[0] = 0.0; if (env->dones[0] == DONE) { add_log(env->log_buffer, &env->log); - reset(env); + c_reset(env); return; } @@ -359,7 +359,7 @@ Client* make_client(int width, int height) { return client; } -void render(Client* client, CConnect4* env) { +void c_render(Client* client, CConnect4* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } diff --git a/pufferlib/ocean/connect4/cy_connect4.pyx b/pufferlib/ocean/connect4/cy_connect4.pyx index abca0fb3..6eb39590 100644 --- a/pufferlib/ocean/connect4/cy_connect4.pyx +++ b/pufferlib/ocean/connect4/cy_connect4.pyx @@ -36,9 +36,9 @@ cdef extern from "connect4.h": void free_cconnect4(CConnect4* env) Client* make_client(float width, float height) void close_client(Client* client) - void render(Client* client, CConnect4* env) - void reset(CConnect4* env) - void step(CConnect4* env) + void c_render(Client* client, CConnect4* env) + void c_reset(CConnect4* env) + void c_step(CConnect4* env) cdef class CyConnect4: cdef: @@ -75,12 +75,12 @@ cdef class CyConnect4: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef CConnect4* env = &self.envs[0] @@ -91,7 +91,7 @@ cdef class CyConnect4: self.client = make_client(env.width, env.height) os.chdir(cwd) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/enduro/cy_enduro.pyx b/pufferlib/ocean/enduro/cy_enduro.pyx index 40e9ba6b..4baed1cf 100644 --- a/pufferlib/ocean/enduro/cy_enduro.pyx +++ b/pufferlib/ocean/enduro/cy_enduro.pyx @@ -52,7 +52,7 @@ cdef extern from "enduro.h": void free_logbuffer(LogBuffer* buffer) Log aggregate_and_clear(LogBuffer* logs) void init(Enduro* env, int seed, int env_index) - void reset(Enduro* env) + void c_reset(Enduro* env) void c_step(Enduro* env) void c_render(Client* client, Enduro* env) Client* make_client(Enduro* env) @@ -103,15 +103,15 @@ cdef class CyEnduro: self.envs[i].log_buffer = self.logs self.envs[i].obs_size = observations.shape[1] - if i % 100 == 0: - print(f"Initializing environment #{i} with seed {unique_seed}") + #if i % 100 == 0: + # print(f"Initializing environment #{i} with seed {unique_seed}") init(&self.envs[i], unique_seed, i) def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i diff --git a/pufferlib/ocean/enduro/enduro.h b/pufferlib/ocean/enduro/enduro.h index 7e442e18..6369413a 100644 --- a/pufferlib/ocean/enduro/enduro.h +++ b/pufferlib/ocean/enduro/enduro.h @@ -475,7 +475,7 @@ void allocate(Enduro* env); void init(Enduro* env, int seed, int env_index); void free_allocated(Enduro* env); void reset_round(Enduro* env); -void reset(Enduro* env); +void c_reset(Enduro* env); unsigned char check_collision(Enduro* env, Car* car); int get_player_lane(Enduro* env); float get_car_scale(float y); @@ -865,7 +865,7 @@ void reset_round(Enduro* env) { } // Reset all init vars; only called once after init -void reset(Enduro* env) { +void c_reset(Enduro* env) { // No random after first reset int reset_seed = (env->reset_count == 0) ? xorshift32(&env->rng_state) : 0; diff --git a/pufferlib/ocean/go/cy_go.pyx b/pufferlib/ocean/go/cy_go.pyx index 5b39e303..d8595c40 100644 --- a/pufferlib/ocean/go/cy_go.pyx +++ b/pufferlib/ocean/go/cy_go.pyx @@ -30,8 +30,6 @@ cdef extern from "go.h": int find(Group*) void union_groups(Group*, int, int) - - ctypedef struct CGo: float* observations int* actions @@ -68,12 +66,12 @@ cdef extern from "go.h": void init(CGo* env) void free_initialized(CGo* env) - void reset(CGo* env) - void step(CGo* env) + void c_reset(CGo* env) + void c_step(CGo* env) Client* make_client(float width, float height) void close_client(Client* client) - void render(Client* client, CGo* env) + void c_render(Client* client, CGo* env) cdef class CyGo: @@ -122,19 +120,19 @@ cdef class CyGo: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef CGo* env = &self.envs[0] if self.client == NULL: self.client = make_client(env.width,env.height) - render(self.client, &self.envs[0]) + c_render(self.client, &self.envs[0]) def close(self): if self.client != NULL: @@ -144,4 +142,4 @@ cdef class CyGo: def log(self): cdef Log log = aggregate_and_clear(self.logs) - return log \ No newline at end of file + return log diff --git a/pufferlib/ocean/go/go.h b/pufferlib/ocean/go/go.h index ca58ad53..afd51f04 100644 --- a/pufferlib/ocean/go/go.h +++ b/pufferlib/ocean/go/go.h @@ -648,7 +648,7 @@ void enemy_greedy_easy(CGo* env){ enemy_random_move(env); } -void reset(CGo* env) { +void c_reset(CGo* env) { env->log = (Log){0}; env->dones[0] = 0; env->score = 0; @@ -687,10 +687,10 @@ void end_game(CGo* env){ env->log.games_played++; env->log.episode_return += env->rewards[0]; add_log(env->log_buffer, &env->log); - reset(env); + c_reset(env); } -void step(CGo* env) { +void c_step(CGo* env) { env->log.episode_length += 1; env->rewards[0] = 0.0; int action = (int)env->actions[0]; @@ -767,7 +767,7 @@ Client* make_client(int width, int height) { return client; } -void render(Client* client, CGo* env) { +void c_render(Client* client, CGo* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } @@ -839,4 +839,4 @@ void render(Client* client, CGo* env) { void close_client(Client* client) { CloseWindow(); free(client); -} \ No newline at end of file +} diff --git a/pufferlib/ocean/moba/cy_moba.pyx b/pufferlib/ocean/moba/cy_moba.pyx index 13267bec..7f74b9d6 100644 --- a/pufferlib/ocean/moba/cy_moba.pyx +++ b/pufferlib/ocean/moba/cy_moba.pyx @@ -198,8 +198,8 @@ cdef extern from "moba.h": unsigned char* read_file(char* filename) - void reset(MOBA* env) - void step(MOBA* env) + void c_reset(MOBA* env) + void c_step(MOBA* env) void randomize_tower_hp(MOBA* env) cpdef entity_dtype(): @@ -267,12 +267,12 @@ cdef class CyMOBA: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self, int tick): if self.client == NULL: diff --git a/pufferlib/ocean/moba/moba.h b/pufferlib/ocean/moba/moba.h index 57717f9e..3243433c 100644 --- a/pufferlib/ocean/moba/moba.h +++ b/pufferlib/ocean/moba/moba.h @@ -1820,7 +1820,7 @@ MOBA* allocate_moba(MOBA* env) { return env; } -void reset(MOBA* env) { +void c_reset(MOBA* env) { //map->pids[:] = -1 //randomize_tower_hp(env); @@ -1890,7 +1890,7 @@ void reset(MOBA* env) { compute_observations(env); } -void step(MOBA* env) { +void c_step(MOBA* env) { for (int pid = 0; pid < NUM_ENTITIES; pid++) { Entity* entity = &env->entities[pid]; entity->target_pid = -1; @@ -1997,7 +1997,7 @@ void step(MOBA* env) { log.dire_carry = env->log[9]; add_log(env->log_buffer, &log); if (do_reset) { - reset(env); + c_reset(env); } } compute_observations(env); @@ -2261,7 +2261,7 @@ int render_game(GameRenderer* renderer, MOBA* env, int frame) { } } if (IsKeyDown(KEY_ESCAPE)) { - return 1; + exit(0); } if (HUMAN_CONTROL) { if (IsKeyDown(KEY_Q) || IsKeyPressed(KEY_Q)) { diff --git a/pufferlib/ocean/nmmo3/cy_nmmo3.pyx b/pufferlib/ocean/nmmo3/cy_nmmo3.pyx index 0c909194..65d03709 100644 --- a/pufferlib/ocean/nmmo3/cy_nmmo3.pyx +++ b/pufferlib/ocean/nmmo3/cy_nmmo3.pyx @@ -138,8 +138,8 @@ cdef extern from "nmmo3.h": int tick(Client* client, MMO* env, float delta) void init_mmo(MMO* env) - void reset(MMO* env, int seed) - void step(MMO* env) + void c_reset(MMO* env, int seed) + void c_step(MMO* env) cpdef entity_dtype(): '''Make a dummy entity to get the dtype''' @@ -226,13 +226,13 @@ cdef class Environment: cdef int i for i in range(self.num_envs): # TODO: Seed - reset(&self.envs[i], i+1) + c_reset(&self.envs[i], i+1) # Do I need to reset terrain here? def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def pids(self): ary = np.zeros((512, 512), dtype=np.intc) diff --git a/pufferlib/ocean/nmmo3/nmmo3.h b/pufferlib/ocean/nmmo3/nmmo3.h index c7794bc7..138ce1e1 100644 --- a/pufferlib/ocean/nmmo3/nmmo3.h +++ b/pufferlib/ocean/nmmo3/nmmo3.h @@ -1669,7 +1669,7 @@ void enemy_ai(MMO* env, int pid) { wander(env, pid); } -void reset(MMO* env, int seed) { +void c_reset(MMO* env, int seed) { srand(time(NULL)); env->tick = 0; @@ -1873,7 +1873,7 @@ void reset(MMO* env, int seed) { compute_all_obs(env); } -void step(MMO* env) { +void c_step(MMO* env) { env->tick += 1; int tick = env->tick; @@ -2576,7 +2576,7 @@ void close_client(Client* client) { UnloadRenderTexture(client->ui_buffer); for (int i = 0; i < NUM_PLAYER_TEXTURES; i++) { for (int element = 0; element < 5; element++) { - UnloadTexture(client->players[i][element]); + UnloadTexture(client->players[element][i]); } } UnloadFont(client->font); diff --git a/pufferlib/ocean/pong/cy_pong.pyx b/pufferlib/ocean/pong/cy_pong.pyx index 75c652ee..5a4eff86 100644 --- a/pufferlib/ocean/pong/cy_pong.pyx +++ b/pufferlib/ocean/pong/cy_pong.pyx @@ -53,12 +53,12 @@ cdef extern from "pong.h": ctypedef struct Client void init(Pong* env) - void reset(Pong* env) - void step(Pong* env) + void c_reset(Pong* env) + void c_step(Pong* env) Client* make_client(Pong* env) void close_client(Client* client) - void render(Client* client, Pong* env) + void c_render(Client* client, Pong* env) cdef class CyPong: cdef: @@ -113,12 +113,12 @@ cdef class CyPong: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef Pong* env = &self.envs[0] @@ -129,7 +129,7 @@ cdef class CyPong: self.client = make_client(env) os.chdir(cwd) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/pong/pong.h b/pufferlib/ocean/pong/pong.h index e08498af..5ac28692 100644 --- a/pufferlib/ocean/pong/pong.h +++ b/pufferlib/ocean/pong/pong.h @@ -147,7 +147,7 @@ void reset_round(Pong* env) { env->n_bounces = 0; } -void reset(Pong* env) { +void c_reset(Pong* env) { env->log = (Log){0}; reset_round(env); env->score_l = 0; @@ -155,7 +155,7 @@ void reset(Pong* env) { compute_observations(env); } -void step(Pong* env) { +void c_step(Pong* env) { env->tick += 1; env->log.episode_length += 1; env->rewards[0] = 0; @@ -213,7 +213,7 @@ void step(Pong* env) { if (env->score_r == env->max_score) { env->terminals[0] = 1; add_log(env->log_buffer, &env->log); - reset(env); + c_reset(env); return; } else { reset_round(env); @@ -246,7 +246,7 @@ void step(Pong* env) { if (env->score_l == env->max_score) { env->terminals[0] = 1; add_log(env->log_buffer, &env->log); - reset(env); + c_reset(env); return; } else { reset_round(env); @@ -302,7 +302,7 @@ void close_client(Client* client) { free(client); } -void render(Client* client, Pong* env) { +void c_render(Client* client, Pong* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } diff --git a/pufferlib/ocean/rware/cy_rware.pyx b/pufferlib/ocean/rware/cy_rware.pyx index b796929a..7364d125 100644 --- a/pufferlib/ocean/rware/cy_rware.pyx +++ b/pufferlib/ocean/rware/cy_rware.pyx @@ -51,9 +51,9 @@ cdef extern from "rware.h": Client* make_client(CRware* env) void close_client(Client* client) - void render(Client* client, CRware* env) - void reset(CRware* env) - void step(CRware* env) + void c_render(Client* client, CRware* env) + void c_reset(CRware* env) + void c_step(CRware* env) cdef class CyRware: cdef: @@ -96,12 +96,12 @@ cdef class CyRware: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef CRware* env = &self.envs[0] @@ -112,7 +112,7 @@ cdef class CyRware: self.client = make_client(env) os.chdir(cwd) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/rware/rware.h b/pufferlib/ocean/rware/rware.h index 14539946..be73651b 100644 --- a/pufferlib/ocean/rware/rware.h +++ b/pufferlib/ocean/rware/rware.h @@ -405,7 +405,7 @@ void compute_observations(CRware* env) { } } -void reset(CRware* env) { +void c_reset(CRware* env) { env->dones[0] = 0; // set agents in center @@ -720,7 +720,7 @@ void process_tree_movements(CRware* env, MovementGraph* graph) { } } -void step(CRware* env) { +void c_step(CRware* env) { memset(env->rewards, 0, env->num_agents * sizeof(float)); MovementGraph* graph = env->movement_graph; for (int i = 0; i < env->num_agents; i++) { @@ -778,7 +778,7 @@ Client* make_client(CRware* env) { return client; } -void render(Client* client, CRware* env) { +void c_render(Client* client, CRware* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } diff --git a/pufferlib/ocean/snake/cy_snake.pyx b/pufferlib/ocean/snake/cy_snake.pyx index 0f0cd5a8..d7ddb5a6 100644 --- a/pufferlib/ocean/snake/cy_snake.pyx +++ b/pufferlib/ocean/snake/cy_snake.pyx @@ -47,12 +47,12 @@ cdef extern from "snake.h": void compute_observations(CSnake* env) void spawn_snake(CSnake* env, int snake_id) void spawn_food(CSnake* env) - void reset(CSnake* env) + void c_reset(CSnake* env) void step_snake(CSnake* env, int i) - void step(CSnake* env) + void c_step(CSnake* env) ctypedef struct Client Client* make_client(int cell_size, int width, int height) - void render(Client* client, CSnake* env) + void c_render(Client* client, CSnake* env) void close_client(Client* client) cdef class CySnake: @@ -100,19 +100,19 @@ cdef class CySnake: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self, cell_size=8): cdef CSnake* env = &self.envs[0] if self.client == NULL: self.client = make_client(cell_size, env.width, env.height) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/snake/snake.h b/pufferlib/ocean/snake/snake.h index 2c1d7474..48fa2509 100644 --- a/pufferlib/ocean/snake/snake.h +++ b/pufferlib/ocean/snake/snake.h @@ -194,7 +194,7 @@ void spawn_food(CSnake* env) { env->grid[idx] = FOOD; } -void reset(CSnake* env) { +void c_reset(CSnake* env) { env->window = 2*env->vision+1; env->obs_size = env->window*env->window; @@ -300,7 +300,7 @@ void step_snake(CSnake* env, int i) { env->grid[next_r*env->width + next_c] = env->snake_colors[i]; } -void step(CSnake* env){ +void c_step(CSnake* env){ for (int i = 0; i < env->num_snakes; i++) step_snake(env, i); @@ -342,7 +342,10 @@ void close_client(Client* client) { free(client); } -void render(Client* client, CSnake* env) { +void c_render(Client* client, CSnake* env) { + if (IsKeyDown(KEY_ESCAPE)) { + exit(0); + } BeginDrawing(); ClearBackground(COLORS[0]); int sz = client->cell_size; diff --git a/pufferlib/ocean/squared/cy_squared.pyx b/pufferlib/ocean/squared/cy_squared.pyx index 249f0376..d90f0dd6 100644 --- a/pufferlib/ocean/squared/cy_squared.pyx +++ b/pufferlib/ocean/squared/cy_squared.pyx @@ -14,12 +14,11 @@ cdef extern from "squared.h": ctypedef struct Client - void reset(Squared* env) - void step(Squared* env) - + void c_reset(Squared* env) + void c_step(Squared* env) Client* make_client(Squared* env) void close_client(Client* client) - void render(Client* client, Squared* env) + void c_render(Client* client, Squared* env) cdef class CySquared: cdef: @@ -48,19 +47,19 @@ cdef class CySquared: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef Squared* env = &self.envs[0] if self.client == NULL: self.client = make_client(env) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/squared/squared.h b/pufferlib/ocean/squared/squared.h index 74a7de6f..77cf6f50 100644 --- a/pufferlib/ocean/squared/squared.h +++ b/pufferlib/ocean/squared/squared.h @@ -38,7 +38,7 @@ void free_allocated(Squared* env) { free(env->terminals); } -void reset(Squared* env) { +void c_reset(Squared* env) { memset(env->observations, 0, env->size*env->size*sizeof(unsigned char)); env->observations[env->size*env->size/2] = AGENT; env->r = env->size/2; @@ -51,7 +51,7 @@ void reset(Squared* env) { env->observations[target_idx] = TARGET; } -void step(Squared* env) { +void c_step(Squared* env) { int action = env->actions[0]; env->terminals[0] = 0; env->rewards[0] = 0; @@ -75,7 +75,7 @@ void step(Squared* env) { || env->c >= env->size) { env->terminals[0] = 1; env->rewards[0] = -1.0; - reset(env); + c_reset(env); return; } @@ -83,7 +83,7 @@ void step(Squared* env) { if (env->observations[pos] == TARGET) { env->terminals[0] = 1; env->rewards[0] = 1.0; - reset(env); + c_reset(env); return; } @@ -111,7 +111,7 @@ void close_client(Client* client) { free(client); } -void render(Client* client, Squared* env) { +void c_render(Client* client, Squared* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index e4d5c0cb..ce99d7dc 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -300,7 +300,6 @@ def decode_actions(self, flat_hidden, lookup, concat=None): class TrashPickup(nn.Module): def __init__(self, env, cnn_channels=32, hidden_size=128, **kwargs): super().__init__() - self.agent_sight_range = env.agent_sight_range self.network= nn.Sequential( pufferlib.pytorch.layer_init( nn.Conv2d(5, cnn_channels, 5, stride=3)), @@ -323,9 +322,7 @@ def forward(self, observations): return actions, value def encode_observations(self, observations): - crop_size = 2 * self.agent_sight_range + 1 - observations = observations.view(-1, 5, crop_size, crop_size).float() - #observations = observations.view(-1, crop_size, crop_size, 5).permute(0, 3, 1, 2).float() + observations = observations.view(-1, 5, 11, 11).float() return self.network(observations), None def decode_actions(self, flat_hidden, lookup, concat=None): diff --git a/pufferlib/ocean/trash_pickup/cy_trash_pickup.pyx b/pufferlib/ocean/trash_pickup/cy_trash_pickup.pyx index 19d25f81..caf4cacf 100644 --- a/pufferlib/ocean/trash_pickup/cy_trash_pickup.pyx +++ b/pufferlib/ocean/trash_pickup/cy_trash_pickup.pyx @@ -37,10 +37,9 @@ cdef extern from "trash_pickup.h": Client* make_client(CTrashPickupEnv* env) void close_client(Client* client) - void render(Client* client, CTrashPickupEnv* env) - - void reset(CTrashPickupEnv* env) - void step(CTrashPickupEnv* env) + void c_render(Client* client, CTrashPickupEnv* env) + void c_reset(CTrashPickupEnv* env) + void c_step(CTrashPickupEnv* env) cdef class CyTrashPickup: cdef: @@ -83,19 +82,19 @@ cdef class CyTrashPickup: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef CTrashPickupEnv* env = &self.envs[0] if self.client == NULL: self.client = make_client(env) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/trash_pickup/trash_pickup.h b/pufferlib/ocean/trash_pickup/trash_pickup.h index aa460cc7..ba7c441a 100644 --- a/pufferlib/ocean/trash_pickup/trash_pickup.h +++ b/pufferlib/ocean/trash_pickup/trash_pickup.h @@ -167,8 +167,6 @@ void compute_observations(CTrashPickupEnv* env) { // Local crop version void compute_observations(CTrashPickupEnv* env) { int sight_range = env->agent_sight_range; - int num_cell_types = 4; // EMPTY, TRASH, BIN, AGENT - char* obs = env->observations; int obs_dim = 2*env->agent_sight_range + 1; @@ -354,7 +352,7 @@ bool is_episode_over(CTrashPickupEnv* env) { return true; } -void reset(CTrashPickupEnv* env) { +void c_reset(CTrashPickupEnv* env) { env->current_step = 0; env->total_episode_reward = 0; @@ -383,7 +381,7 @@ void initialize_env(CTrashPickupEnv* env) { env->entities = (Entity*)calloc(env->num_agents + env->num_bins + env->num_trash, sizeof(Entity)); env->total_num_obs = env->num_agents * ((((env->agent_sight_range * 2 + 1) * (env->agent_sight_range * 2 + 1)) * 5)); - reset(env); + c_reset(env); } void allocate(CTrashPickupEnv* env) { @@ -397,7 +395,7 @@ void allocate(CTrashPickupEnv* env) { initialize_env(env); } -void step(CTrashPickupEnv* env) { +void c_step(CTrashPickupEnv* env) { // Reset reward for each agent memset(env->rewards, 0, sizeof(float) * env->num_agents); memset(env->dones, 0, sizeof(unsigned char) * env->num_agents); @@ -427,7 +425,7 @@ void step(CTrashPickupEnv* env) { add_log(env->log_buffer, &log); - reset(env); + c_reset(env); } compute_observations(env); @@ -479,7 +477,11 @@ Client* make_client(CTrashPickupEnv* env) { } // Render the TrashPickup environment -void render(Client* client, CTrashPickupEnv* env) { +void c_render(Client* client, CTrashPickupEnv* env) { + if (IsKeyDown(KEY_ESCAPE)) { + exit(0); + } + BeginDrawing(); ClearBackground(PUFF_BACKGROUND); diff --git a/pufferlib/ocean/tripletriad/cy_tripletriad.pyx b/pufferlib/ocean/tripletriad/cy_tripletriad.pyx index 903659c0..561f2352 100644 --- a/pufferlib/ocean/tripletriad/cy_tripletriad.pyx +++ b/pufferlib/ocean/tripletriad/cy_tripletriad.pyx @@ -43,9 +43,9 @@ cdef extern from "tripletriad.h": Client* make_client(float width, float height) void close_client(Client* client) - void render(Client* client, CTripleTriad* env) - void reset(CTripleTriad* env) - void step(CTripleTriad* env) + void c_render(Client* client, CTripleTriad* env) + void c_reset(CTripleTriad* env) + void c_step(CTripleTriad* env) cdef class CyTripleTriad: cdef: @@ -82,19 +82,19 @@ cdef class CyTripleTriad: def reset(self): cdef int i for i in range(self.num_envs): - reset(&self.envs[i]) + c_reset(&self.envs[i]) def step(self): cdef int i for i in range(self.num_envs): - step(&self.envs[i]) + c_step(&self.envs[i]) def render(self): cdef CTripleTriad* env = &self.envs[0] if self.client == NULL: self.client = make_client(env.width, env.height) - render(self.client, env) + c_render(self.client, env) def close(self): if self.client != NULL: diff --git a/pufferlib/ocean/tripletriad/tripletriad.h b/pufferlib/ocean/tripletriad/tripletriad.h index 2fdc5fb8..48e0894f 100644 --- a/pufferlib/ocean/tripletriad/tripletriad.h +++ b/pufferlib/ocean/tripletriad/tripletriad.h @@ -294,7 +294,7 @@ void compute_observations(CTripleTriad* env) { } } -void reset(CTripleTriad* env) { +void c_reset(CTripleTriad* env) { env->log = (Log){0}; env->game_over = 0; for(int i=0; i< 2; i++) { @@ -490,7 +490,7 @@ void check_card_conversions(CTripleTriad* env, int card_placement, int player) { } } -void step(CTripleTriad* env) { +void c_step(CTripleTriad* env) { env->log.episode_length += 1; env->rewards[0] = 0.0; int action = env->actions[0]; @@ -499,7 +499,7 @@ void step(CTripleTriad* env) { env->log.score = env->score[0]; add_log(env->log_buffer, &env->log); //printf("Log: %f, %f, %f\n", env->log.episode_return, env->log.episode_length, env->log.score); - reset(env); + c_reset(env); return; } // select a card if the card is in the range of 1-5 and the card is not placed @@ -575,7 +575,7 @@ Client* make_client(int width, int height) { return client; } -void render(Client* client, CTripleTriad* env) { +void c_render(Client* client, CTripleTriad* env) { if (IsKeyDown(KEY_ESCAPE)) { exit(0); } diff --git a/setup.py b/setup.py index 1094c7a8..dd9bc793 100644 --- a/setup.py +++ b/setup.py @@ -278,11 +278,23 @@ library_dirs=['raylib/lib'], libraries=["raylib"], runtime_library_dirs=["raylib/lib"], - extra_compile_args=['-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION', '-DPLATFORM_DESKTOP', '-O2', '-Wno-alloc-size-larger-than'],#, '-g'], - extra_link_args=[rpath_arg] - + extra_compile_args=['-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION', '-DPLATFORM_DESKTOP', '-O2', '-Wno-alloc-size-larger-than', '-fwrapv'],#, '-g'], + extra_link_args=[rpath_arg, '-Bsymbolic-functions', '-O2', '--enable-new-dtags', '-fwrapv'] ) for path in extension_paths] - + +# Prevent Conda from injecting garbage compile flags +from distutils.sysconfig import get_config_vars +cfg_vars = get_config_vars() +for key in ('CC', 'CXX', 'LDSHARED'): + if cfg_vars[key]: + cfg_vars[key] = cfg_vars[key].replace('-B /root/anaconda3/compiler_compat', '') + cfg_vars[key] = cfg_vars[key].replace('-pthread', '') + cfg_vars[key] = cfg_vars[key].replace('-fno-strict-overflow', '') + +for key, value in cfg_vars.items(): + if value and '-fno-strict-overflow' in str(value): + cfg_vars[key] = value.replace('-fno-strict-overflow', '') + setup( name="pufferlib", description="PufferAI Library" @@ -295,7 +307,7 @@ }, include_package_data=True, install_requires=[ - 'numpy==1.23.3', + 'numpy>=1.23.3', 'opencv-python==3.4.17.63', 'cython>=3.0.0', 'rich', From 25ccb9305f2b3f1ecd9d580a9eb7787580c2e7f7 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 15 Jan 2025 14:14:12 +0000 Subject: [PATCH 09/21] Link path fixes for package build --- MANIFEST.in | 5 +++++ pufferlib/version.py | 2 +- setup.py | 21 +++++++++++++-------- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 451161a7..a9a48075 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,4 +3,9 @@ global-include *.pxd global-include *.h global-include *.py recursive-include pufferlib/resources * +recursive-exclude experiments * +recursive-exclude wandb * +recursive-exclude tests * +recursive-exclude raylib * +recursive-exclude raylib_wasm * diff --git a/pufferlib/version.py b/pufferlib/version.py index e7c12d28..13ce17d8 100644 --- a/pufferlib/version.py +++ b/pufferlib/version.py @@ -1 +1 @@ -__version__ = '2.0.3' +__version__ = '2.0.6' diff --git a/setup.py b/setup.py index dd9bc793..a99edfd2 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -from setuptools import find_packages, setup, Extension +from setuptools import find_packages, find_namespace_packages, setup, Extension from Cython.Build import cythonize import numpy import os @@ -9,7 +9,7 @@ # python3 setup.py built_ext --inplace -VERSION = '2.0.3' +VERSION = '2.0.6' RAYLIB_BASE = 'https://github.com/raysan5/raylib/releases/download/5.0/' RAYLIB_NAME = 'raylib-5.0_macos' if platform.system() == "Darwin" else 'raylib-5.0_linux_amd64' @@ -266,8 +266,10 @@ # So @loader_path/../../raylib/lib is common. rpath_arg = '-Wl,-rpath,@loader_path/../../raylib/lib' elif system == 'Linux': - # On Linux, $ORIGIN works - rpath_arg = '-Wl,-rpath,$ORIGIN/raylib/lib' + # TODO: Check if anything moves packages around after they are installed. + # That would break this linking. Rel path doesn't work outside the pufferlib dir + raylib_dir = os.path.abspath("raylib/lib") + rpath_arg = f"-Wl,-rpath,{raylib_dir}" else: raise ValueError(f'Unsupported system: {system}') @@ -277,7 +279,7 @@ include_dirs=[numpy.get_include(), 'raylib/include'], library_dirs=['raylib/lib'], libraries=["raylib"], - runtime_library_dirs=["raylib/lib"], + runtime_library_dirs=['raylib/lib'], extra_compile_args=['-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION', '-DPLATFORM_DESKTOP', '-O2', '-Wno-alloc-size-larger-than', '-fwrapv'],#, '-g'], extra_link_args=[rpath_arg, '-Bsymbolic-functions', '-O2', '--enable-new-dtags', '-fwrapv'] ) for path in extension_paths] @@ -301,9 +303,12 @@ "PufferAI's library of RL tools and utilities", long_description_content_type="text/markdown", version=VERSION, - packages=find_packages(), + packages=find_namespace_packages() + find_packages(), package_data={ - "pufferlib": ["raylib/lib/libraylib.so.500", "raylib/lib/libraylib.so"] + "pufferlib": [ + "raylib/lib/libraylib.so.500", + "raylib/lib/libraylib.so" + ] }, include_package_data=True, install_requires=[ @@ -347,7 +352,7 @@ #annotate=True, #compiler_directives={'profile': True},# annotate=True ), - include_dirs=[numpy.get_include(), 'raylib-5.0_linux_amd64/include'], + include_dirs=[numpy.get_include(), 'raylib/include'], python_requires=">=3.9", license="MIT", author="Joseph Suarez", From dcff24311459fe55a809d0e063100b37fdbbdb9d Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 15 Jan 2025 19:06:30 +0000 Subject: [PATCH 10/21] Switch to static linking to avoid much pain and suffering --- MANIFEST.in | 6 +++--- setup.py | 60 +++++++++++++++++++++++++++-------------------------- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index a9a48075..5d7e29df 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -6,6 +6,6 @@ recursive-include pufferlib/resources * recursive-exclude experiments * recursive-exclude wandb * recursive-exclude tests * -recursive-exclude raylib * -recursive-exclude raylib_wasm * - +include raylib-5.0_linux_amd64/lib/libraylib.a +include raylib-5.0_macos/lib/libraylib.a +recursive-exclude raylib-5.0_webassembly * diff --git a/setup.py b/setup.py index a99edfd2..b9de6e8e 100644 --- a/setup.py +++ b/setup.py @@ -12,31 +12,35 @@ VERSION = '2.0.6' RAYLIB_BASE = 'https://github.com/raysan5/raylib/releases/download/5.0/' + RAYLIB_NAME = 'raylib-5.0_macos' if platform.system() == "Darwin" else 'raylib-5.0_linux_amd64' -RAYLIB_WASM_URL = RAYLIB_BASE + 'raylib-5.0_webassembly.zip' -RAYLIB_URL = RAYLIB_BASE + RAYLIB_NAME + '.tar.gz' -if not os.path.exists('raylib'): - print("Raylib not found, downloading...") - urllib.request.urlretrieve(RAYLIB_URL, 'raylib.tar.gz') - with tarfile.open('raylib.tar.gz', 'r') as tar_ref: +RAYLIB_LINUX = 'raylib-5.0_linux_amd64' +RAYLIB_LINUX_URL = RAYLIB_BASE + RAYLIB_LINUX + '.tar.gz' +if not os.path.exists(RAYLIB_LINUX): + urllib.request.urlretrieve(RAYLIB_LINUX_URL, RAYLIB_LINUX + '.tar.gz') + with tarfile.open(RAYLIB_LINUX + '.tar.gz', 'r') as tar_ref: tar_ref.extractall() - os.rename(RAYLIB_NAME, 'raylib') - os.remove('raylib.tar.gz') + os.remove(RAYLIB_LINUX + '.tar.gz') -if not os.path.exists('raylib_wasm'): - print("Raylib WASM not found, downloading...") - urllib.request.urlretrieve(RAYLIB_WASM_URL, 'raylib.zip') - with zipfile.ZipFile('raylib.zip', 'r') as zip_ref: - zip_ref.extractall() - os.rename('raylib-5.0_webassembly', 'raylib_wasm') +RAYLIB_MACOS = 'raylib-5.0_macos' +RAYLIB_MACOS_URL = RAYLIB_BASE + RAYLIB_MACOS + '.tar.gz' +if not os.path.exists(RAYLIB_MACOS): + urllib.request.urlretrieve(RAYLIB_MACOS_URL, RAYLIB_MACOS + '.tar.gz') + with tarfile.open(RAYLIB_MACOS + '.tar.gz', 'r') as tar_ref: + tar_ref.extractall() - os.remove('raylib.zip') - -#import os -#os.environ['CFLAGS'] = '-O3 -march=native -Wall' + os.remove(RAYLIB_MACOS + '.tar.gz') + +RAYLIB_WASM = 'raylib-5.0_webassembly' +RAYLIB_WASM_URL = RAYLIB_BASE + RAYLIB_WASM + '.zip' +if not os.path.exists(RAYLIB_WASM): + urllib.request.urlretrieve(RAYLIB_WASM_URL, RAYLIB_WASM + '.zip') + with zipfile.ZipFile(RAYLIB_WASM + '.zip', 'r') as zip_ref: + zip_ref.extractall() + os.remove(RAYLIB_WASM + '.zip') # Default Gym/Gymnasium/PettingZoo versions # Gym: @@ -264,24 +268,23 @@ # The extension “.so” is typically in pufferlib/ocean/..., # and “raylib/lib” is (maybe) two directories up from ocean/. # So @loader_path/../../raylib/lib is common. - rpath_arg = '-Wl,-rpath,@loader_path/../../raylib/lib' + RAYLIB_INCLUDE = f'{RAYLIB_MACOS}/include' + RAYLIB_LIB = f'{RAYLIB_MACOS}/lib' elif system == 'Linux': # TODO: Check if anything moves packages around after they are installed. # That would break this linking. Rel path doesn't work outside the pufferlib dir - raylib_dir = os.path.abspath("raylib/lib") - rpath_arg = f"-Wl,-rpath,{raylib_dir}" + RAYLIB_INCLUDE = f'{RAYLIB_LINUX}/include' + RAYLIB_LIB = f'{RAYLIB_LINUX}/lib' else: raise ValueError(f'Unsupported system: {system}') extensions = [Extension( path.replace('/', '.'), [path + '.pyx'], - include_dirs=[numpy.get_include(), 'raylib/include'], - library_dirs=['raylib/lib'], - libraries=["raylib"], - runtime_library_dirs=['raylib/lib'], + include_dirs=[numpy.get_include(), RAYLIB_INCLUDE], extra_compile_args=['-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION', '-DPLATFORM_DESKTOP', '-O2', '-Wno-alloc-size-larger-than', '-fwrapv'],#, '-g'], - extra_link_args=[rpath_arg, '-Bsymbolic-functions', '-O2', '--enable-new-dtags', '-fwrapv'] + extra_link_args=['-Bsymbolic-functions', '-O2', '--enable-new-dtags', '-fwrapv'], + extra_objects=[f'{RAYLIB_LIB}/libraylib.a'] ) for path in extension_paths] # Prevent Conda from injecting garbage compile flags @@ -306,8 +309,7 @@ packages=find_namespace_packages() + find_packages(), package_data={ "pufferlib": [ - "raylib/lib/libraylib.so.500", - "raylib/lib/libraylib.so" + f'{RAYLIB_LIB}/libraylib.a', ] }, include_package_data=True, @@ -352,7 +354,7 @@ #annotate=True, #compiler_directives={'profile': True},# annotate=True ), - include_dirs=[numpy.get_include(), 'raylib/include'], + include_dirs=[numpy.get_include(), RAYLIB_INCLUDE], python_requires=">=3.9", license="MIT", author="Joseph Suarez", From ec53ba040571a95b2df0169935c87b801ebf7f09 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 15 Jan 2025 19:11:48 +0000 Subject: [PATCH 11/21] Remove arg for mac --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b9de6e8e..537a45c1 100644 --- a/setup.py +++ b/setup.py @@ -283,7 +283,7 @@ [path + '.pyx'], include_dirs=[numpy.get_include(), RAYLIB_INCLUDE], extra_compile_args=['-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION', '-DPLATFORM_DESKTOP', '-O2', '-Wno-alloc-size-larger-than', '-fwrapv'],#, '-g'], - extra_link_args=['-Bsymbolic-functions', '-O2', '--enable-new-dtags', '-fwrapv'], + extra_link_args=['-Bsymbolic-functions', '-O2', '-fwrapv'], extra_objects=[f'{RAYLIB_LIB}/libraylib.a'] ) for path in extension_paths] From 770e994290d78b739b11d43851021260f85e8382 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 15 Jan 2025 19:16:47 +0000 Subject: [PATCH 12/21] Add conda to ci --- .github/workflows/install.yml | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 873d3c5c..1f13317a 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -1,11 +1,12 @@ name: install + on: push: pull_request: jobs: test: - name: test ${{ matrix.py }} - ${{ matrix.os }} + name: test ${{ matrix.py }} - ${{ matrix.os }} - ${{ matrix.env }} runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -17,13 +18,31 @@ jobs: - "3.11" - "3.10" - "3.9" + env: + - pip + - conda steps: - - name: Setup python for test ${{ matrix.py }} + - name: Checkout code + uses: actions/checkout@v3 + + - name: Setup Conda or pip + if: matrix.env == 'conda' + uses: conda-incubator/setup-miniconda@v2 + with: + python-version: ${{ matrix.py }} + activate-environment: test-env + environment-file: environment.yml + auto-update-conda: true + + - name: Setup Python for pip + if: matrix.env == 'pip' uses: actions/setup-python@v4 with: python-version: ${{ matrix.py }} - - uses: actions/checkout@v3 + - name: Upgrade pip + if: matrix.env == 'pip' run: python -m pip install -U pip + - name: Install pufferlib - run: pip3 install -e . + run: pip install -e . From e881766ca0c03f5dcd07de1ebe5e9c74b8035461 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 15 Jan 2025 14:17:45 -0500 Subject: [PATCH 13/21] Add conda to ci --- .github/workflows/install.yml | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 873d3c5c..1f13317a 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -1,11 +1,12 @@ name: install + on: push: pull_request: jobs: test: - name: test ${{ matrix.py }} - ${{ matrix.os }} + name: test ${{ matrix.py }} - ${{ matrix.os }} - ${{ matrix.env }} runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -17,13 +18,31 @@ jobs: - "3.11" - "3.10" - "3.9" + env: + - pip + - conda steps: - - name: Setup python for test ${{ matrix.py }} + - name: Checkout code + uses: actions/checkout@v3 + + - name: Setup Conda or pip + if: matrix.env == 'conda' + uses: conda-incubator/setup-miniconda@v2 + with: + python-version: ${{ matrix.py }} + activate-environment: test-env + environment-file: environment.yml + auto-update-conda: true + + - name: Setup Python for pip + if: matrix.env == 'pip' uses: actions/setup-python@v4 with: python-version: ${{ matrix.py }} - - uses: actions/checkout@v3 + - name: Upgrade pip + if: matrix.env == 'pip' run: python -m pip install -U pip + - name: Install pufferlib - run: pip3 install -e . + run: pip install -e . From f63ed9b0f89cc49b29769768c0810e800776c81f Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 15 Jan 2025 14:19:56 -0500 Subject: [PATCH 14/21] Fix conda ci --- .github/workflows/install.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 1f13317a..9e8f15fd 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -25,13 +25,12 @@ jobs: - name: Checkout code uses: actions/checkout@v3 - - name: Setup Conda or pip + - name: Setup Conda if: matrix.env == 'conda' uses: conda-incubator/setup-miniconda@v2 with: python-version: ${{ matrix.py }} activate-environment: test-env - environment-file: environment.yml auto-update-conda: true - name: Setup Python for pip @@ -41,7 +40,6 @@ jobs: python-version: ${{ matrix.py }} - name: Upgrade pip - if: matrix.env == 'pip' run: python -m pip install -U pip - name: Install pufferlib From e18bc65cffe883e50d7e54e88b5a2d0981b20728 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 15 Jan 2025 14:22:35 -0500 Subject: [PATCH 15/21] Fix miniconda version --- .github/workflows/install.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/install.yml b/.github/workflows/install.yml index 9e8f15fd..8541dbee 100644 --- a/.github/workflows/install.yml +++ b/.github/workflows/install.yml @@ -30,6 +30,7 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: python-version: ${{ matrix.py }} + miniconda-version: "latest" activate-environment: test-env auto-update-conda: true From 076696a5a70527e6fb974b807d6c5d84ffe9bd59 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 4 Feb 2025 21:06:02 +0000 Subject: [PATCH 16/21] nmmo3 obs space --- pufferlib/ocean/nmmo3/nmmo3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pufferlib/ocean/nmmo3/nmmo3.py b/pufferlib/ocean/nmmo3/nmmo3.py index 3648e4df..93a3bcfd 100644 --- a/pufferlib/ocean/nmmo3/nmmo3.py +++ b/pufferlib/ocean/nmmo3/nmmo3.py @@ -138,8 +138,8 @@ def __init__(self, width=4*[512], height=4*[512], num_envs=4, self.prof_goal_mask = np.array([0, 0, 0, 1, 0, 0, 1, 1, 1, 1]) self.tick = 0 - self.single_observation_space = gymnasium.spaces.Box(low=-1, - high=2**32-1, shape=(11*15*10+47+10,), dtype=np.uint8) + self.single_observation_space = gymnasium.spaces.Box(low=0, + high=255, shape=(11*15*10+47+10,), dtype=np.uint8) self.single_action_space = gymnasium.spaces.Discrete(26) self.render_mode = 'human' From 0cb9a319ad527450ebf98392714e663528e915d3 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 4 Feb 2025 21:19:50 +0000 Subject: [PATCH 17/21] Update nmmo c to c_step, c_reset --- pufferlib/ocean/nmmo3/nmmo3.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pufferlib/ocean/nmmo3/nmmo3.c b/pufferlib/ocean/nmmo3/nmmo3.c index 10057231..a7a5ef78 100644 --- a/pufferlib/ocean/nmmo3/nmmo3.c +++ b/pufferlib/ocean/nmmo3/nmmo3.c @@ -165,7 +165,7 @@ void demo(int num_players) { }; allocate_mmo(&env); - reset(&env, 42); + c_reset(&env, 42); // Must reset before making client Client* client = make_client(&env); @@ -183,7 +183,7 @@ void demo(int num_players) { env.actions[0] = human_action; } - step(&env); + c_step(&env); //printf("Reward: %f\n\tDeath: %f\n\tProf: %f\n\tComb: %f\n\tItem: %f\n", env.rewards[0].death, env.rewards[0].death, env.rewards[0].prof_lvl, env.rewards[0].comb_lvl, env.rewards[0].item_atk_lvl); human_action = ATN_NOOP; } else { @@ -222,13 +222,13 @@ void test_mmonet_performance(int num_players, int timeout) { .y_window = 5, }; allocate_mmo(&env); - reset(&env, 42); + c_reset(&env, 42); int start = time(NULL); int num_steps = 0; while (time(NULL) - start < timeout) { forward(net, env.obs, env.actions); - step(&env); + c_step(&env); num_steps++; } @@ -434,7 +434,7 @@ void test_performance(int num_players, int timeout) { .y_window = 5, }; allocate_mmo(&env); - reset(&env, 0); + c_reset(&env, 0); int start = time(NULL); int num_steps = 0; @@ -442,7 +442,7 @@ void test_performance(int num_players, int timeout) { for (int i = 0; i < num_players; i++) { env.actions[i] = rand() % 23; } - step(&env); + c_step(&env); num_steps++; } From bda11c6f0763472393f4532054df8d64d09e479e Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Tue, 4 Feb 2025 21:27:56 +0000 Subject: [PATCH 18/21] Fix raylib for linux for now --- scripts/build_ocean.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/build_ocean.sh b/scripts/build_ocean.sh index 563ca529..b65625ca 100755 --- a/scripts/build_ocean.sh +++ b/scripts/build_ocean.sh @@ -42,10 +42,10 @@ fi FLAGS=( -Wall - -I./raylib/include + -I./raylib-5.0_linux_amd64/include -I./pufferlib "$SRC_DIR/$ENV.c" -o "$ENV" - ./raylib/lib/libraylib.a + ./raylib-5.0_linux_amd64/lib/libraylib.a -lm -lpthread -DPLATFORM_DESKTOP From 542236e5aecc0a4c64401d310aae5ec33a5af94a Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 5 Feb 2025 17:37:16 +0000 Subject: [PATCH 19/21] Fix build from previous conda support changes --- pufferlib/ocean/moba/moba.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pufferlib/ocean/moba/moba.c b/pufferlib/ocean/moba/moba.c index 81a95798..e0a0c519 100644 --- a/pufferlib/ocean/moba/moba.c +++ b/pufferlib/ocean/moba/moba.c @@ -125,11 +125,11 @@ void demo() { GameRenderer* renderer = init_game_renderer(32, 41, 23); - reset(&env); + c_reset(&env); int frame = 0; while (!WindowShouldClose()) { if (frame % 12 == 0) { - step(&env); + c_step(&env); forward(net, env.observations, env.actions); } render_game(renderer, &env, frame); @@ -157,7 +157,7 @@ void test_performance(float test_time) { }; allocate_moba(&env); - reset(&env); + c_reset(&env); int start = time(NULL); int i = 0; while (time(NULL) - start < test_time) { @@ -169,7 +169,7 @@ void test_performance(float test_time) { env.actions[6*j + 4] = rand()%2; env.actions[6*j + 5] = rand()%2; } - step(&env); + c_step(&env); i++; } int end = time(NULL); @@ -192,11 +192,11 @@ void test_bugs(float test_time) { }; allocate_moba(&env); - reset(&env); + c_reset(&env); int start = time(NULL); int i = 0; while (time(NULL) - start < test_time) { - step(&env); + c_step(&env); forward(net, env.observations, env.actions); i++; } From 33f4ee969dcd445c18a806d7cb64c2079e7689f1 Mon Sep 17 00:00:00 2001 From: Joseph Suarez Date: Wed, 5 Feb 2025 17:48:19 +0000 Subject: [PATCH 20/21] Fix c builds after conda support --- pufferlib/ocean/breakout/breakout.c | 10 +++++----- pufferlib/ocean/connect4/connect4.c | 10 +++++----- pufferlib/ocean/enduro/enduro.c | 4 ++-- pufferlib/ocean/go/go.c | 10 +++++----- pufferlib/ocean/pong/pong.c | 6 +++--- pufferlib/ocean/rware/rware.c | 10 +++++----- pufferlib/ocean/snake/snake.c | 10 +++++----- pufferlib/ocean/squared/squared.c | 6 +++--- pufferlib/ocean/trash_pickup/trash_pickup.c | 12 ++++++------ pufferlib/ocean/tripletriad/tripletriad.c | 6 +++--- 10 files changed, 42 insertions(+), 42 deletions(-) diff --git a/pufferlib/ocean/breakout/breakout.c b/pufferlib/ocean/breakout/breakout.c index 9203ddee..e2897d14 100644 --- a/pufferlib/ocean/breakout/breakout.c +++ b/pufferlib/ocean/breakout/breakout.c @@ -20,7 +20,7 @@ void demo() { .brick_cols = 18, }; allocate(&env); - reset(&env); + c_reset(&env); Client* client = make_client(&env); @@ -35,8 +35,8 @@ void demo() { forward_linearlstm(net, env.observations, env.actions); } - step(&env); - render(client, &env); + c_step(&env); + c_render(client, &env); } free_linearlstm(net); free(weights); @@ -60,13 +60,13 @@ void performance_test() { .brick_cols = 18, }; allocate(&env); - reset(&env); + c_reset(&env); long start = time(NULL); int i = 0; while (time(NULL) - start < test_time) { env.actions[0] = rand() % 4; - step(&env); + c_step(&env); i++; } long end = time(NULL); diff --git a/pufferlib/ocean/connect4/connect4.c b/pufferlib/ocean/connect4/connect4.c index a5d7ddc2..fa08b7de 100644 --- a/pufferlib/ocean/connect4/connect4.c +++ b/pufferlib/ocean/connect4/connect4.c @@ -15,7 +15,7 @@ void interactive() { .piece_height = 96, }; allocate_cconnect4(&env); - reset(&env); + c_reset(&env); Client* client = make_client(env.width, env.height); float observations[42] = {0}; @@ -43,10 +43,10 @@ void interactive() { tick = (tick + 1) % 60; if (env.actions[0] >= 0 && env.actions[0] <= 6) { - step(&env); + c_step(&env); } - render(client, &env); + c_render(client, &env); } free_linearlstm(net); free(weights); @@ -63,13 +63,13 @@ void performance_test() { .piece_height = 96, }; allocate_cconnect4(&env); - reset(&env); + c_reset(&env); long start = time(NULL); int i = 0; while (time(NULL) - start < test_time) { env.actions[0] = rand() % 7; - step(&env); + c_step(&env); i++; } long end = time(NULL); diff --git a/pufferlib/ocean/enduro/enduro.c b/pufferlib/ocean/enduro/enduro.c index fbdb3e74..1bef4a4e 100644 --- a/pufferlib/ocean/enduro/enduro.c +++ b/pufferlib/ocean/enduro/enduro.c @@ -47,7 +47,7 @@ int demo() { unsigned int seed = 0; init(&env, seed, 0); - reset(&env); + c_reset(&env); while (!WindowShouldClose()) { if (IsKeyDown(KEY_LEFT_SHIFT)) { @@ -78,7 +78,7 @@ void perftest(float test_time) { unsigned int seed = 12345; init(&env, seed, 0); - reset(&env); + c_reset(&env); int start = time(NULL); int i = 0; diff --git a/pufferlib/ocean/go/go.c b/pufferlib/ocean/go/go.c index 22e9bed3..371af40d 100644 --- a/pufferlib/ocean/go/go.c +++ b/pufferlib/ocean/go/go.c @@ -133,7 +133,7 @@ void demo(int grid_size) { Weights* weights = load_weights("resources/go_weights.bin", 254867); GoNet* net = init_gonet(weights, 1, grid_size); allocate(&env); - reset(&env); + c_reset(&env); Client* client = make_client(env.width, env.height); int tick = 0; @@ -147,7 +147,7 @@ void demo(int grid_size) { if (IsKeyDown(KEY_LEFT_SHIFT)) { env.actions[0] = human_action; } - step(&env); + c_step(&env); if (IsKeyDown(KEY_LEFT_SHIFT)) { env.actions[0] = -1; } @@ -187,7 +187,7 @@ void demo(int grid_size) { } } } - render(client, &env); + c_render(client, &env); } close_client(client); free_allocated(&env); @@ -209,13 +209,13 @@ void performance_test() { .reward_move_valid = 0.1 }; allocate(&env); - reset(&env); + c_reset(&env); long start = time(NULL); int i = 0; while (time(NULL) - start < test_time) { env.actions[0] = rand() % (env.grid_size)*(env.grid_size); - step(&env); + c_step(&env); i++; } long end = time(NULL); diff --git a/pufferlib/ocean/pong/pong.c b/pufferlib/ocean/pong/pong.c index 43f57d2e..9a1fd947 100644 --- a/pufferlib/ocean/pong/pong.c +++ b/pufferlib/ocean/pong/pong.c @@ -26,7 +26,7 @@ int main() { Client* client = make_client(&env); - reset(&env); + c_reset(&env); while (!WindowShouldClose()) { // User can take control of the paddle if (IsKeyDown(KEY_LEFT_SHIFT)) { @@ -37,8 +37,8 @@ int main() { forward_linearlstm(net, env.observations, env.actions); } - step(&env); - render(client, &env); + c_step(&env); + c_render(client, &env); } free_linearlstm(net); free(weights); diff --git a/pufferlib/ocean/rware/rware.c b/pufferlib/ocean/rware/rware.c index 77a030bc..d24d63ca 100644 --- a/pufferlib/ocean/rware/rware.c +++ b/pufferlib/ocean/rware/rware.c @@ -36,7 +36,7 @@ void demo(int map_choice) { LinearLSTM* net = make_linearlstm(weights, env.num_agents, 27, 5); allocate(&env); - reset(&env); + c_reset(&env); Client* client = make_client(&env); int tick = 0; @@ -54,7 +54,7 @@ void demo(int map_choice) { env.actions[env.human_agent_idx] = human_action; } - step(&env); + c_step(&env); if (IsKeyDown(KEY_LEFT_SHIFT)) { env.actions[env.human_agent_idx] = NOOP; @@ -82,7 +82,7 @@ void demo(int map_choice) { } } - render(client,&env); + c_render(client,&env); } close_client(client); free_allocated(&env); @@ -99,13 +99,13 @@ void performance_test() { .reward_type = 2 }; allocate(&env); - reset(&env); + c_reset(&env); long start = time(NULL); int i = 0; while (time(NULL) - start < test_time) { env.actions[0] = rand() % 5; - step(&env); + c_step(&env); i++; } long end = time(NULL); diff --git a/pufferlib/ocean/snake/snake.c b/pufferlib/ocean/snake/snake.c index 4b4eaebb..76b791dc 100644 --- a/pufferlib/ocean/snake/snake.c +++ b/pufferlib/ocean/snake/snake.c @@ -16,7 +16,7 @@ int demo() { .reward_death = -1.0f, }; allocate_csnake(&env); - reset(&env); + c_reset(&env); Weights* weights = load_weights("resources/snake_weights.bin", 148357); LinearLSTM* net = make_linearlstm(weights, env.num_snakes, env.obs_size, 4); @@ -35,8 +35,8 @@ int demo() { } forward_linearlstm(net, net->obs, env.actions); } - step(&env); - render(client, &env); + c_step(&env); + c_render(client, &env); } free_linearlstm(net); free(weights); @@ -59,7 +59,7 @@ void test_performance(float test_time) { .reward_death = -1.0f, }; allocate_csnake(&env); - reset(&env); + c_reset(&env); int start = time(NULL); int i = 0; @@ -67,7 +67,7 @@ void test_performance(float test_time) { for (int j = 0; j < env.num_snakes; j++) { env.actions[j] = rand()%4; } - step(&env); + c_step(&env); i++; } int end = time(NULL); diff --git a/pufferlib/ocean/squared/squared.c b/pufferlib/ocean/squared/squared.c index 34332090..98a46eeb 100644 --- a/pufferlib/ocean/squared/squared.c +++ b/pufferlib/ocean/squared/squared.c @@ -10,7 +10,7 @@ int main() { Client* client = make_client(&env); - reset(&env); + c_reset(&env); while (!WindowShouldClose()) { if (IsKeyDown(KEY_LEFT_SHIFT)) { env.actions[0] = 0; @@ -22,8 +22,8 @@ int main() { env.actions[0] = NOOP; //forward_linearlstm(net, env.observations, env.actions); } - step(&env); - render(client, &env); + c_step(&env); + c_render(client, &env); } //free_linearlstm(net); //free(weights); diff --git a/pufferlib/ocean/trash_pickup/trash_pickup.c b/pufferlib/ocean/trash_pickup/trash_pickup.c index b8021719..f5aa295b 100644 --- a/pufferlib/ocean/trash_pickup/trash_pickup.c +++ b/pufferlib/ocean/trash_pickup/trash_pickup.c @@ -14,7 +14,7 @@ void demo(int grid_size, int num_agents, int num_trash, int num_bins, int max_st .do_human_control = true }; - bool use_pretrained_model = true; + bool use_pretrained_model = false; Weights* weights; ConvLSTM* net; @@ -28,7 +28,7 @@ void demo(int grid_size, int num_agents, int num_trash, int num_bins, int max_st allocate(&env); Client* client = make_client(&env); - reset(&env); + c_reset(&env); int tick = 0; while (!WindowShouldClose()) { @@ -64,12 +64,12 @@ void demo(int grid_size, int num_agents, int num_trash, int num_bins, int max_st } // Step the environment and render the grid - step(&env); + c_step(&env); } tick++; - render(client, &env); + c_render(client, &env); } free_convlstm(net); @@ -91,7 +91,7 @@ void performance_test() { .agent_sight_range = 5 }; allocate(&env); - reset(&env); + c_reset(&env); long start = time(NULL); int i = 0; @@ -100,7 +100,7 @@ void performance_test() { for (int e = 0; e < env.num_agents; e++) { env.actions[e] = rand() % 4; } - step(&env); + c_step(&env); i += inc; } long end = time(NULL); diff --git a/pufferlib/ocean/tripletriad/tripletriad.c b/pufferlib/ocean/tripletriad/tripletriad.c index 3dd64a5f..7bb6b350 100644 --- a/pufferlib/ocean/tripletriad/tripletriad.c +++ b/pufferlib/ocean/tripletriad/tripletriad.c @@ -14,7 +14,7 @@ int main() { .num_cards = 10, }; allocate_ctripletriad(&env); - reset(&env); + c_reset(&env); Client* client = make_client(env.width, env.height); int tick = 0; @@ -59,10 +59,10 @@ int main() { tick = (tick + 1) % 45; if (env.actions[0] != NOOP) { - step(&env); + c_step(&env); } - render(client, &env); + c_render(client, &env); } free_linearlstm(net); free(weights); From 447acb7cd8ab54c5bf5df1d6795d5470ab9c9e98 Mon Sep 17 00:00:00 2001 From: murtazarang Date: Fri, 7 Feb 2025 16:40:28 -0600 Subject: [PATCH 21/21] PZ observation space (Box) fix Previous check was made against PZ dictionary, and not the individual agent space. --- pufferlib/emulation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/emulation.py b/pufferlib/emulation.py index 8b8089b7..b14a6b67 100644 --- a/pufferlib/emulation.py +++ b/pufferlib/emulation.py @@ -256,7 +256,7 @@ def __init__(self, env=None, env_creator=None, env_args=[], buf=None, env_kwargs self.num_agents = len(self.possible_agents) set_buffers(self, buf) - if isinstance(self.env.observation_space, pufferlib.spaces.Box): + if isinstance(self.env_single_observation_space, pufferlib.spaces.Box): self.obs_struct = self.observations else: self.obs_struct = self.observations.view(self.obs_dtype)