Updated tutorials, requirements, and added workflow to test tutorials…

… (experimental)
elliottower · Mar 10, 2023 · c3713d5 · c3713d5
1 parent 9b4b602
commit c3713d5
Show file tree

Hide file tree

Showing 11 changed files with 126 additions and 11 deletions.
diff --git a/.github/workflows/linux-tutorials-test.yml b/.github/workflows/linux-tutorials-test.yml
@@ -0,0 +1,36 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+---
+name: Tutorial tests
+
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+permissions:
+  contents: read
+
+jobs:
+  tutorial-test:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ['3.7', '3.8', '3.9', '3.10']  # '3.11' - broken due to numba
+        tutorial: ['GreedyAgent']
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies and run tutorials
+        run: |
+          sudo apt-get install python3-opengl xvfb
+          cd tutorials/${{ matrix.tutorial }}
+          pip install -r requirements.txt
+          pip uninstall -y pettingzoo
+          pip install -e ../..
+          for f in *.py; do xvfb-run -a -s "-screen 0 1024x768x24" python "$f"; done
diff --git a/gobblet/game/greedy_policy.py b/gobblet/game/greedy_policy.py
@@ -16,7 +16,7 @@ def __init__(
         self.board = None
         self.depth = depth
         self.rng = np.random.default_rng()
-        self.prev_actions = []
+        self.prev_actions = {i: [] for i in range(2)}
 
     def compute_actions_rllib(self, obs_batch):
         observations = obs_batch["observation"]
@@ -208,11 +208,11 @@ def compute_action(
                                         break  # If there is nothing we can do to prevent them from winning in depth3, we have to do one of the moves
 
         # If we have not selected an action, or have already done it in the last 3 turns, choose randomly
-        if chosen_action is None or chosen_action in self.prev_actions[-3:]:
+        if chosen_action is None or chosen_action in self.prev_actions[agent_index][-3:]:
             # Choose randomly between possible actions:
             # chosen_action = self.rng.choice(actions_depth1)
             chosen_action = np.random.choice(actions_depth1)
             # print(f"Choosing randomly between possible actions: {actions_depth1} --> {chosen_action}")
-        self.prev_actions.append(chosen_action)
+        self.prev_actions[agent_index].append(chosen_action)
         act = np.array(chosen_action)
         return act
diff --git a/install_wasm.sh b/install_wasm.sh
@@ -2,7 +2,7 @@
 mkdir -p modules
 cd modules
 
-python -m pip download gymnasium==0.26.3 pettingzoo==1.22.3 # tianshou==0.4.1 torch==1.13.1
+python -m pip download numpy==1.22.0 gymnasium==0.26.3 pettingzoo==1.22.3 # tianshou==0.4.1 torch==1.13.1
 
 unzip -o '*.whl'
 rm *.whl
diff --git a/main.py b/main.py
@@ -29,11 +29,13 @@ async def main() -> None:
             observation, reward, termination, truncation, info = env.last()
 
             if termination or truncation:
+                env.render()
+                time.sleep(1)
                 print(f"Agent: ({agent}), Reward: {reward}, info: {info}")
                 break
 
-            if iter < 5:
-                # Randomize the first 4 actions (otherwise both agents will make the same moves and get stuck in a loop)
+            if iter < 2:
+                # Randomize the first action for variety (games can be repeated otherwise)
                 action_mask = observation["action_mask"]
                 action = np.random.choice(
                     np.arange(len(action_mask)), p=action_mask / np.sum(action_mask)

diff --git a/pyproject.toml → pyproject_setuptools.toml b/pyproject.toml → pyproject_setuptools.toml
diff --git a/requirements.txt b/requirements.txt
@@ -7,4 +7,5 @@ pytest==7.1.2
 ray==2.2.0
 tianshou==0.4.11
 torch==1.12.1
-pre-commit==3.1.1
+pre-commit==3.1.1
+hypothesis==2.4.0
diff --git a/tutorials/GreedyAgent/greedy_agent.md b/tutorials/GreedyAgent/greedy_agent.md
@@ -0,0 +1,21 @@
+# Tutorial: Greedy Agent
+This tutorial provides a basic example of running the gobblet environment using greedy agents.
+
+The agents are greedy in the sense that they will only choose actions which:
+1. Wins the game 
+2. Blocks the opponent from winning
+
+The `depth` parameter controls the amount of turns in the future they are able to search through. 
+
+For example, depth 2 means it will consider moves which will set the agent up to win with their next move, regardless of what the opponent does.
+
+This script randomizes the first move for each agent, in order to add variety, and the underlying policy in `greedy_policy.py` additionally enforces that agents cannot repeat any of the previous 3 moves they have made (to avoid getting stuck in a loop).
+
+## Usage:
+
+1. (Optional) Create a virtual environment: `conda create -n gobblet python=3.10`
+2. (Optional) Activate the virtual environment: `conda activate gobblet`
+3. Install gobblet: run `pip install gobblet-rl` or run `pip install -e .` in the root directory
+4. Install requirements for this tutorial: `cd tutorials/GreedyAgent && pip install -r requirements.txt`
+5. Run `python tutorial_greedy.py`
+
diff --git a/tutorials/GreedyAgent/requirements.txt b/tutorials/GreedyAgent/requirements.txt
@@ -0,0 +1,5 @@
+gym==0.23.1
+gymnasium==0.27.1
+numpy==1.22.0
+PettingZoo==1.22.3
+pygame==2.1.2
diff --git a/tutorials/GreedyAgent/tutorial_greedy.py b/tutorials/GreedyAgent/tutorial_greedy.py
@@ -0,0 +1,49 @@
+import time
+import numpy as np
+
+from gobblet import gobblet_v1  # noqa: E402
+
+PLAYER = 0
+DEPTH = 2
+RENDER_MODE = "human"
+
+if __name__ == "__main__":
+    env = gobblet_v1.env(render_mode="human", args=None)
+
+    greedy_policy = gobblet_v1.GreedyGobbletPolicy(depth=DEPTH)
+
+    # Render 3 games between greedy agents
+    for _ in range(3):
+        env.reset()
+        env.render()  # need to render the environment before pygame can take user input
+
+        iter = 0
+
+        for agent in env.agent_iter():
+            observation, reward, termination, truncation, info = env.last()
+
+            if termination or truncation:
+                env.render()
+                time.sleep(1)
+                print(f"Agent: ({agent}), Reward: {reward}, info: {info}")
+                break
+
+            if iter < 2:
+                # Randomize the first action for variety (games can be repeated otherwise)
+                action_mask = observation["action_mask"]
+                action = np.random.choice(
+                    np.arange(len(action_mask)), p=action_mask / np.sum(action_mask)
+                )
+                # Wait 1 second between moves so the user can follow the sequence of moves
+                time.sleep(1)
+
+            else:
+                action = greedy_policy.compute_action(
+                    observation["observation"], observation["action_mask"]
+                )
+                # Wait 1 second between moves so the user can follow the sequence of moves
+                time.sleep(1)
+
+            env.step(action)
+
+            iter += 1
diff --git a/tutorials/PygameWASM/requirements_wasm.txt → tutorials/WebAssembly/requirements.txt b/tutorials/PygameWASM/requirements_wasm.txt → tutorials/WebAssembly/requirements.txt
diff --git a/tutorials/PygameWASM/pygame_wasm.md → tutorials/WebAssembly/web_assembly.md b/tutorials/PygameWASM/pygame_wasm.md → tutorials/WebAssembly/web_assembly.md
@@ -1,7 +1,8 @@
-# Tutorial
+# Tutorial: WebAssembly
 This tutorial provides a working example of using Pygame with WebAssembly to run and render a Gobblet environment locally in the browser. 
 
-The script displays a game of chess between two random agents. 
+The script adapts the Greedy Agent tutorial code to run in the web, displaying games between two greedy agents. 
+
 Features such as loading trained models or using interactive environments may be added in the future. 
 
 It can be deployed to github-pages via a workflow (see `.github/workflows/pygbag.yml`).
@@ -20,8 +21,8 @@ Pygbag works via a virtual environment, and all dependencies must be downloaded
 
 ## Usage:
 
-1. (Optional) Create a virtual environment: `conda create -n pygame-wasm python=3.10`
-2. (Optional) Activate the virtual environment: `conda activate pygame-wasm`
+1. (Optional) Create a virtual environment: `conda create -n wasm python=3.10`
+2. (Optional) Activate the virtual environment: `conda activate wasm`
 3. Install requirements: `pip install -r requirements.txt`
 4. Run `bash install.sh` in order to download and unzip dependencies to be used in the WASM virtual machine.
 5. Change directory to parent of root project directory: `cd ../../..`