ray-project · gjoliver · Jan 10, 2023 · Jan 5, 2023 · Jan 6, 2023 · Jan 6, 2023
@@ -1033,6 +1033,12 @@ py_test(
     size = "small",
     srcs = ["algorithms/impala/tests/test_vtrace.py"]
 )
+py_test(
+    name = "test_impala_off_policyness",
+    tags = ["team:rllib", "algorithms_dir", "multi_gpu", "exclusive"],
+    size = "large",
+    srcs = ["algorithms/impala/tests/test_impala_off_policyness.py"]
+)
 
 # MARWIL
 py_test(
@@ -2560,7 +2566,7 @@ py_test(
 py_test(
      name = "tests/test_supported_spaces_ppo_no_preproceesor_gpu",
      main = "tests/test_supported_spaces.py",
-     tags = ["team:rllib", "tests_dir", "multi_gpu"],
+     tags = ["team:rllib", "tests_dir", "multi_gpu", "exclusive"],
      size = "medium",
      srcs = ["tests/test_supported_spaces.py"],
      args = ["TestSupportedSpacesPPONoPreprocessorGPU"]

@@ -8,7 +8,6 @@
 from ray.rllib.utils.test_utils import (
     check,
     check_compute_single_action,
-    check_off_policyness,
     check_train_results,
     framework_iterator,
 )
@@ -31,6 +30,7 @@ def test_impala_compilation(self):
             impala.ImpalaConfig()
             .environment("CartPole-v1")
             .resources(num_gpus=0)
+            .rollouts(num_rollout_workers=2)
             .training(
                 model={
                     "lstm_use_prev_action": True,
@@ -56,8 +56,6 @@ def test_impala_compilation(self):
                     results = algo.train()
                     print(results)
                     check_train_results(results)
-                    off_policy_ness = check_off_policyness(results, upper_limit=2.0)
-                    print(f"off-policy'ness={off_policy_ness}")
 
                 check_compute_single_action(
                     algo,

@@ -0,0 +1,53 @@
+import unittest
+
+import ray
+import ray.rllib.algorithms.impala as impala
+from ray.rllib.utils.framework import try_import_tf
+from ray.rllib.utils.test_utils import (
+    check_compute_single_action,
+    check_off_policyness,
+    framework_iterator,
+)
+
+tf1, tf, tfv = try_import_tf()
+
+
+class TestIMPALAOffPolicyNess(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        ray.init(num_gpus=1)
+
+    @classmethod
+    def tearDownClass(cls) -> None:
+        ray.shutdown()
+
+    def test_impala_off_policyness(self):
+        config = (
+            impala.ImpalaConfig()
+            .environment("CartPole-v1")
+            .resources(num_gpus=1)
+            .rollouts(num_rollout_workers=4)
+        )
+        num_iterations = 3
+
+        for _ in framework_iterator(config, with_eager_tracing=True):
+            for num_aggregation_workers in [0, 1]:
+                config.num_aggregation_workers = num_aggregation_workers
+                print("aggregation-workers={}".format(config.num_aggregation_workers))
+                algo = config.build()
+                for i in range(num_iterations):
+                    results = algo.train()
+                    off_policy_ness = check_off_policyness(results, upper_limit=2.0)
+                    print(f"off-policy'ness={off_policy_ness}")
+
+                check_compute_single_action(
+                    algo,
+                )
+                algo.stop()
+
+
+if __name__ == "__main__":
+    import pytest
+    import sys
+
+    sys.exit(pytest.main(["-v", __file__]))