ray-project · richardliaw · Aug 5, 2022 · Aug 3, 2022 · Aug 3, 2022 · Aug 4, 2022
diff --git a/release/release_tests.yaml b/release/release_tests.yaml
@@ -2739,7 +2739,7 @@
 
   cluster:
     cluster_env: app_config.yaml
-    cluster_compute: 2gpus_32cpus.yaml
+    cluster_compute: 1gpu_16cpus.yaml
 
   run:
     timeout: 18000

diff --git a/release/rllib_tests/1gpu_16cpus.yaml b/release/rllib_tests/1gpu_16cpus.yaml
@@ -0,0 +1,21 @@
+cloud_id: {{env["ANYSCALE_CLOUD_ID"]}}
+region: us-west-2
+
+max_workers: 0
+
+head_node_type:
+    name: head_node
+    instance_type: g3.4xlarge
+
+worker_node_types:
+    - name: worker_node
+      instance_type: m5.xlarge
+      min_workers: 0
+      max_workers: 0
+      use_spot: false
+
+aws:
+    BlockDeviceMappings:
+        - DeviceName: /dev/sda1
+          Ebs:
+            VolumeSize: 500
diff --git a/release/rllib_tests/learning_tests/yaml_files/cql/cql-halfcheetahbulletenv-v0.yaml b/release/rllib_tests/learning_tests/yaml_files/cql/cql-halfcheetahbulletenv-v0.yaml
@@ -4,12 +4,15 @@ cql-halfcheetahbulletenv-v0:
     pass_criteria:
         evaluation/episode_reward_mean: 400.0
         # Can not check throughput for offline methods.
-        # timesteps_total: 10000000
+        timesteps_total: 2500000
     stop:
-        time_total_s: 3600
+        time_total_s: 1800
     config:
         # Use input produced by expert SAC algo.
-        input: ["~/halfcheetah_expert_sac.zip"]
+        input: "dataset"
+        input_config:
+            format: "json"
+            paths: "s3://air-example-data/rllib/half_cheetah/half_cheetah.json"
         actions_in_input_normalized: true
 
         soft_horizon: False
@@ -25,19 +28,18 @@ cql-halfcheetahbulletenv-v0:
         no_done_at_end: false
         n_step: 3
         rollout_fragment_length: 1
-        replay_buffer_config:
-          type: MultiAgentReplayBuffer
-          learning_starts: 256
+        num_workers: 8
+        grad_clip: 40
         train_batch_size: 256
         target_network_update_freq: 0
         min_train_timesteps_per_iteration: 1000
         optimization:
             actor_learning_rate: 0.0001
             critic_learning_rate: 0.0003
             entropy_learning_rate: 0.0001
-        num_workers: 0
         num_gpus: 1
         metrics_smoothing_episodes: 5
+        min_time_s_per_iteration: 30
 
         # CQL Configs
         min_q_weight: 5.0