Merge branch 'main' into vlm

volcengine · Feb 26, 2025 · 5b87997 · 5b87997
2 parents 11d8882 + 2440aa6
commit 5b87997
Show file tree

Hide file tree

Showing 39 changed files with 265 additions and 201 deletions.
diff --git a/README.md b/README.md
@@ -126,6 +126,7 @@ verl is inspired by the design of Nemo-Aligner, Deepspeed-chat and OpenRLHF. The
 - [Logic R1](https://github.com/Unakar/Logic-RL): a reproduced DeepSeek R1 Zero on 2K Tiny Logic Puzzle Dataset.
 - [deepscaler](https://github.com/agentica-project/deepscaler): iterative context scaling with GRPO
 - [critic-rl](https://github.com/HKUNLP/critic-rl): Teaching Language Models to Critique via Reinforcement Learning
+- [Easy-R1](https://github.com/hiyouga/EasyR1): Multi-Modality RL
 
 ## Contribution Guide
 Contributions from the community are welcome!

diff --git a/docs/README_vllm0.7.md b/docs/README_vllm0.7.md
@@ -1,8 +1,8 @@
-# Readme for verl(vllm>=0.7) version
+# Upgrading to vllm >= 0.7
 
 ## Installation
 
-Note: This version of veRL supports **FSDP** for training and **vLLM** for rollout. (Megatron-LM is not supported yet.)
+Note: This version of veRL+vllm 0.7+ supports **FSDP** for training and **vLLM** for rollout.
 
 ```
 # Create the conda environment
@@ -62,4 +62,4 @@ For a typical job like examples/ppo_trainer/run_qwen2-7b_seq_balance.sh, the rol
 
 1. **num_scheduler_step>1:** not supported yet (weight loading has not been aligned with `MultiStepModelRunner`)
 2. **Prefix caching:** not supported yet (vLLM sleep mode does not support prefix caching)
-3. **Chunked prefill:** supported
+3. **Chunked prefill:** supported
diff --git a/docs/conf.py b/docs/conf.py
@@ -45,6 +45,8 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = ['recommonmark',
+  'sphinx.ext.autodoc',
+  'sphinx.ext.autosummary',
   'sphinx.ext.autosectionlabel',
 ]
 

diff --git a/docs/data.rst b/docs/data.rst
@@ -0,0 +1,59 @@
+Data interface
+=========================
+
+DataProto is the interface for data exchange.
+
+The :class:`verl.DataProto` class contains two key members:
+
+- batch: a :class:`tensordict.TensorDict` object for the actual data
+- meta_info: a :class:`Dict` with additional meta information
+
+TensorDict
+~~~~~~~~~~~~
+
+:attr:`DataProto.batch` is built on top of :class:`tensordict`, a project in the PyTorch ecosystem.
+A TensorDict is a dict-like container for tensors. To instantiate a TensorDict, you must specify key-value pairs as well as the batch size.
+
+.. code-block:: python
+
+    >>> import torch
+    >>> from tensordict import TensorDict
+    >>> tensordict = TensorDict({"zeros": torch.zeros(2, 3, 4), "ones": torch.ones(2, 3, 5)}, batch_size=[2,])
+    >>> tensordict["twos"] = 2 * torch.ones(2, 5, 6)
+    >>> zeros = tensordict["zeros"]
+    >>> tensordict
+    TensorDict(
+    fields={
+        ones: Tensor(shape=torch.Size([2, 3, 5]), device=cpu, dtype=torch.float32, is_shared=False),
+        twos: Tensor(shape=torch.Size([2, 5, 6]), device=cpu, dtype=torch.float32, is_shared=False),
+        zeros: Tensor(shape=torch.Size([2, 3, 4]), device=cpu, dtype=torch.float32, is_shared=False)},
+    batch_size=torch.Size([2]),
+    device=None,
+    is_shared=False)
+
+One can also index a tensordict along its batch_size. The contents of the TensorDict can be manipulated collectively as well.
+
+.. code-block:: python
+
+    >>> tensordict[..., :1]
+    TensorDict(
+    fields={
+        ones: Tensor(shape=torch.Size([1, 3, 5]), device=cpu, dtype=torch.float32, is_shared=False),
+        twos: Tensor(shape=torch.Size([1, 5, 6]), device=cpu, dtype=torch.float32, is_shared=False),
+        zeros: Tensor(shape=torch.Size([1, 3, 4]), device=cpu, dtype=torch.float32, is_shared=False)},
+    batch_size=torch.Size([1]),
+    device=None,
+    is_shared=False)
+    >>> tensordict = tensordict.to("cuda:0")
+    >>> tensordict = tensordict.reshape(6)
+
+For more about :class:`tensordict.TensorDict` usage, see the official tensordict_ documentation.
+
+.. _tensordict: https://pytorch.org/tensordict/overview.html
+
+
+Core APIs
+~~~~~~~~~~~~~~~~~
+
+.. autoclass::  verl.DataProto
+   :members: to, select, union, make_iterator, concat
diff --git a/docs/index.rst b/docs/index.rst
@@ -72,6 +72,7 @@ verl is fast with:
    :caption: Performance Tuning Guide
 
    perf/perf_tuning
+   README_vllm0.7.md
 
 .. toctree::
    :maxdepth: 1
@@ -88,6 +89,13 @@ verl is fast with:
    advance/fsdp_extension
    advance/megatron_extension
 
+.. toctree::
+   :maxdepth: 1
+   :caption: API References
+
+   data.rst
+
+
 .. toctree::
    :maxdepth: 1
    :caption: FAQ

diff --git a/examples/grpo_trainer/run_qwen2-7b.sh b/examples/grpo_trainer/run_qwen2-7b.sh
@@ -4,12 +4,12 @@ export VLLM_ATTENTION_BACKEND=XFORMERS
 
 python3 -m verl.trainer.main_ppo \
     algorithm.adv_estimator=grpo \
-    data.train_files=data/gsm8k/train.parquet \
-    data.val_files=data/gsm8k/test.parquet \
+    data.train_files=$HOME/data/gsm8k/train.parquet \
+    data.val_files=$HOME/data/gsm8k/test.parquet \
     data.train_batch_size=1024 \
     data.max_prompt_length=512 \
     data.max_response_length=1024 \
-    actor_rollout_ref.model.path=/mnt/hdfs/veomni/models/qwen2_5-7b-instruct \
+    actor_rollout_ref.model.path=Qwen/Qwen2-7B-Instruct \
     actor_rollout_ref.actor.optim.lr=1e-6 \
     actor_rollout_ref.model.use_remove_padding=True \
     actor_rollout_ref.actor.ppo_mini_batch_size=256 \

diff --git a/examples/grpo_trainer/run_qwen2_5_vl-7b.sh b/examples/grpo_trainer/run_qwen2_5_vl-7b.sh
@@ -4,12 +4,13 @@ export VLLM_ATTENTION_BACKEND=XFORMERS
 
 python3 -m verl.trainer.main_ppo \
     algorithm.adv_estimator=grpo \
-    data.train_files=data/geo3k/train.parquet \
-    data.val_files=data/geo3k/test.parquet \
+    data.train_files=$HOME/data/geo3k/train.parquet \
+    data.val_files=$HOME/data/geo3k/test.parquet \
     data.train_batch_size=512 \
     data.max_prompt_length=1536 \
     data.max_response_length=1536 \
-    actor_rollout_ref.model.path=/mnt/hdfs/veomni/models/qwen2_5vl-7b-instruct \
+    data.image_key=images \
+    actor_rollout_ref.model.path=Qwen/Qwen2.5-VL-7B-Instruct \
     actor_rollout_ref.actor.optim.lr=1e-6 \
     actor_rollout_ref.model.use_remove_padding=True \
     actor_rollout_ref.actor.ppo_mini_batch_size=128 \

diff --git a/examples/split_placement/main_ppo_split.py b/examples/split_placement/main_ppo_split.py
@@ -100,7 +100,7 @@ def main(config):
 
 @ray.remote
 def main_task(config):
-    from verl.utils.fs import copy_local_path_from_hdfs
+    from verl.utils.fs import copy_to_local
     from transformers import AutoTokenizer
 
     # print initial config
@@ -110,7 +110,7 @@ def main_task(config):
     OmegaConf.resolve(config)
 
     # download the checkpoint from hdfs
-    local_path = copy_local_path_from_hdfs(config.actor_rollout_ref.model.path)
+    local_path = copy_to_local(config.actor_rollout_ref.model.path)
 
     # instantiate tokenizer
     from verl.utils import hf_tokenizer

diff --git a/tests/e2e/arithmetic_sequence/rl/main_trainer.py b/tests/e2e/arithmetic_sequence/rl/main_trainer.py
@@ -22,7 +22,7 @@
 
 from verl import DataProto
 from verl.trainer.ppo.ray_trainer import RayPPOTrainer
-from verl.utils.fs import copy_local_path_from_hdfs
+from verl.utils.fs import copy_to_local
 from tests.e2e.envs.digit_completion import CharTokenizer
 
 
@@ -105,7 +105,7 @@ def main(config):
     pprint(OmegaConf.to_container(config, resolve=True))  # resolve=True will eval symbol values
 
     # download the checkpoint from hdfs
-    local_path = copy_local_path_from_hdfs(config.actor_rollout_ref.model.path)
+    local_path = copy_to_local(config.actor_rollout_ref.model.path)
     local_path = os.path.expanduser(local_path)
     # instantiate tokenizern
     tokenizer = AutoTokenizer.from_pretrained(local_path)

diff --git a/tests/rollout/run_fsdp_vllm.py b/tests/rollout/run_fsdp_vllm.py
@@ -35,8 +35,8 @@ def main():
     local_cache_path = os.path.expanduser(local_cache_path)
     hdfs_path = 'Qwen/Qwen2-7B-Instruct'
 
-    from verl.utils.fs import copy_local_path_from_hdfs
-    local_model_path = copy_local_path_from_hdfs(src=hdfs_path, cache_dir=local_cache_path)
+    from verl.utils.fs import copy_to_local
+    local_model_path = copy_to_local(src=hdfs_path, cache_dir=local_cache_path)
     tokenizer = AutoTokenizer.from_pretrained(local_model_path, trust_remote_code=True)
     actor_model_config = AutoConfig.from_pretrained(local_model_path, trust_remote_code=True)
     with torch.device("cuda"):

diff --git a/tests/rollout/test_vllm_hf_loader.py b/tests/rollout/test_vllm_hf_loader.py
@@ -80,8 +80,8 @@ def test_vllm_with_hf():
     local_cache_path = '~/.cache/verl/rlhf'
     local_cache_path = os.path.expanduser(local_cache_path)
     hdfs_path = 'deepseek-ai/deepseek-llm-7b-chat'
-    from verl.utils.fs import copy_local_path_from_hdfs
-    local_model_path = copy_local_path_from_hdfs(src=hdfs_path, cache_dir=local_cache_path)
+    from verl.utils.fs import copy_to_local
+    local_model_path = copy_to_local(src=hdfs_path, cache_dir=local_cache_path)
     tokenizer = AutoTokenizer.from_pretrained(local_model_path)
 
     preencode_prompts = [

diff --git a/tests/rollout/test_vllm_spmd.py b/tests/rollout/test_vllm_spmd.py
@@ -78,8 +78,8 @@ def test_vllm_spmd():
     local_cache_path = '~/.cache/verl/rlhf'
     local_cache_path = os.path.expanduser(local_cache_path)
     hdfs_path = 'Qwen/Qwen2-7B-Instruct'
-    from verl.utils.fs import copy_local_path_from_hdfs
-    local_model_path = copy_local_path_from_hdfs(src=hdfs_path, cache_dir=local_cache_path)
+    from verl.utils.fs import copy_to_local
+    local_model_path = copy_to_local(src=hdfs_path, cache_dir=local_cache_path)
     tokenizer = AutoTokenizer.from_pretrained(local_model_path, padding_side='left')
 
     preencode_prompts = [

diff --git a/verl/__init__.py b/verl/__init__.py
@@ -25,3 +25,7 @@
 import logging
 
 set_basic_config(level=logging.WARNING)
+
+from . import single_controller
+
+__all__ = ['DataProto', "__version__"]
diff --git a/verl/models/registry.py b/verl/models/registry.py
@@ -19,23 +19,14 @@
 
 # Supported models using HF Rmpad
 # TODO(sgm): HF may supported more than listed here, we should add more after testing
-from transformers import LlamaConfig, MistralConfig, GemmaConfig, Qwen2Config
-
-_REOVEPAD_MODELS = {'llama': LlamaConfig, 'mistral': MistralConfig, 'gemma': GemmaConfig, 'qwen2': Qwen2Config}
-
-try:
-    from transformers import Qwen2VLConfig, Qwen2_5_VLConfig
-
-    _REOVEPAD_MODELS.update({'qwen2_vl': Qwen2VLConfig, 'qwen2_5_vl': Qwen2_5_VLConfig})
-except ImportError:
-    pass
+_MODELS_SUPPORT_RMPAD = {'llama', 'mistral', 'gemma', 'qwen2', 'qwen2_vl', 'qwen2_5_vl'}
 
 
 def check_model_support_rmpad(model_type: str):
     assert isinstance(model_type, str)
-    if not model_type in _REOVEPAD_MODELS.keys():
+    if not model_type in _MODELS_SUPPORT_RMPAD:
         raise ValueError(f"Model architecture {model_type} is not supported for now. "
-                         f"RMPad supported architectures: {_REOVEPAD_MODELS.keys()}."
+                         f"RMPad supported architectures: {_MODELS_SUPPORT_RMPAD}."
                          f"Please set `use_remove_padding=False` in the model config.")
 
     if model_type in ("qwen2_vl", "qwen2_5_vl"):  # patch remove padding for qwen2vl mrope