Skip to content

Commit

Permalink
[RLlib] Change placement group strategy for learner (ray-project#36929)
Browse files Browse the repository at this point in the history
Signed-off-by: avnishn <avnishnarayan@gmail.com>
Signed-off-by: e428265 <arvind.chandramouli@lmco.com>
  • Loading branch information
avnishn authored and arvind-chandra committed Aug 31, 2023
1 parent 6caac99 commit 9d1c878
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 60 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ head_node_type:

worker_node_types:
- name: worker_node
instance_type: g4dn.xlarge
min_workers: 2
max_workers: 2
instance_type: g3.8xlarge
min_workers: 1
max_workers: 1
use_spot: false

aws:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ head_node_type:

worker_node_types:
- name: worker_node
instance_type: n1-standard-4-nvidia-tesla-t4-1 # g4dn.xlarge
min_workers: 2
max_workers: 2
instance_type: n1-standard-4-nvidia-t4-16gb-2
min_workers: 1
max_workers: 1
use_spot: false
73 changes: 45 additions & 28 deletions rllib/algorithms/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,48 @@
from ray.util.timer import _Timer
from ray.tune.registry import get_trainable_cls


try:
from ray.rllib.extensions import AlgorithmBase
except ImportError:

class AlgorithmBase:
@staticmethod
def _get_learner_bundles(cf: AlgorithmConfig) -> List[Dict[str, int]]:
"""Selects the right resource bundles for learner workers based off of cf.
Args:
cf: The algorithm config.
Returns:
A list of resource bundles for the learner workers.
"""
if cf.num_learner_workers > 0:
if cf.num_gpus_per_learner_worker:
learner_bundles = [
{"GPU": cf.num_learner_workers * cf.num_gpus_per_learner_worker}
]
elif cf.num_cpus_per_learner_worker:
learner_bundles = [
{
"CPU": cf.num_cpus_per_learner_worker
* cf.num_learner_workers,
}
]
else:
learner_bundles = [
{
# sampling and training is not done concurrently when local is
# used, so pick the max.
"CPU": max(
cf.num_cpus_per_learner_worker, cf.num_cpus_for_local_worker
),
"GPU": cf.num_gpus_per_learner_worker,
}
]
return learner_bundles


tf1, tf, tfv = try_import_tf()

logger = logging.getLogger(__name__)
Expand All @@ -146,7 +188,7 @@ def with_common_config(*args, **kwargs):


@PublicAPI
class Algorithm(Trainable):
class Algorithm(Trainable, AlgorithmBase):
"""An RLlib algorithm responsible for optimizing one or more Policies.
Algorithms contain a WorkerSet under `self.workers`. A WorkerSet is
Expand Down Expand Up @@ -2161,22 +2203,12 @@ def default_resource_request(
if cf.num_learner_workers == 0:
# in this case local_worker only does sampling and training is done on
# local learner worker
driver = {
# sampling and training is not done concurrently when local is
# used, so pick the max.
"CPU": max(
cf.num_cpus_per_learner_worker, cf.num_cpus_for_local_worker
),
"GPU": cf.num_gpus_per_learner_worker,
}
driver = cls._get_learner_bundles(cf)[0]
else:
# in this case local_worker only does sampling and training is done on
# remote learner workers
driver = {"CPU": cf.num_cpus_for_local_worker, "GPU": 0}
else:
# Without Learner API, the local_worker can do both sampling and training.
# So, we need to allocate the same resources for the driver as for the
# local_worker.
driver = {
"CPU": cf.num_cpus_for_local_worker,
"GPU": 0 if cf._fake_gpus else cf.num_gpus,
Expand Down Expand Up @@ -2223,22 +2255,7 @@ def default_resource_request(
# resources for remote learner workers
learner_bundles = []
if cf._enable_learner_api and cf.num_learner_workers > 0:
# can't specify cpus for learner workers at the same
# time as gpus
if cf.num_gpus_per_learner_worker:
learner_bundles = [
{
"GPU": cf.num_gpus_per_learner_worker,
}
for _ in range(cf.num_learner_workers)
]
elif cf.num_cpus_per_learner_worker:
learner_bundles = [
{
"CPU": cf.num_cpus_per_learner_worker,
}
for _ in range(cf.num_learner_workers)
]
learner_bundles = cls._get_learner_bundles(cf)

bundles = [driver] + rollout_bundles + evaluation_bundles + learner_bundles

Expand Down
29 changes: 3 additions & 26 deletions rllib/algorithms/impala/impala.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,32 +839,9 @@ def default_resource_request(
# factories.
if cf._enable_learner_api:
# Resources for the Algorithm.
if cf.num_learner_workers == 0:
# if num_learner_workers is 0, then we need to allocate one gpu if
# num_gpus_per_learner_worker is greater than 0.
trainer_bundle = [
{
"CPU": cf.num_cpus_per_learner_worker,
"GPU": cf.num_gpus_per_learner_worker,
}
]
else:
if cf.num_gpus_per_learner_worker:
trainer_bundle = [
{
"GPU": cf.num_gpus_per_learner_worker,
}
for _ in range(cf.num_learner_workers)
]
elif cf.num_cpus_per_learner_worker:
trainer_bundle = [
{
"CPU": cf.num_cpus_per_learner_worker,
}
for _ in range(cf.num_learner_workers)
]

bundles += trainer_bundle
learner_bundles = cls._get_learner_bundles(cf)

bundles += learner_bundles

# Return PlacementGroupFactory containing all needed resources
# (already properly defined as device bundles).
Expand Down

0 comments on commit 9d1c878

Please sign in to comment.