[megatron] fix: critic and reward model load tokenizer from config

Currently, the worker will fail if the critic or reward model path doesn't contain a tokenizer. This PR tries to fix this by loading tokenizer from the config for the previously mentioned case. - For the critic model, we fall back to load from `critic.model.tokenizer_path`. - For the reward model, we first fall back to load from `reward_model.model.rm_tokenizer`, and then `reward_model.model.input_tokenizer` if that is not set. Signed-off-by: Hollow Man <hollowman@opensuse.org>
volcengine · Feb 19, 2025 · ab24a03 · ab24a03
1 parent 55a4d3c
commit ab24a03
Showing 1 changed file with 26 additions and 3 deletions.
diff --git a/verl/workers/megatron_workers.py b/verl/workers/megatron_workers.py
@@ -483,7 +483,12 @@ def _build_critic_model_optimizer(self,
 
         # Step 1: initialize the tokenizer
         local_path = copy_local_path_from_hdfs(model_path)
-        self.tokenizer = hf_tokenizer(local_path)
+        try:
+            self.tokenizer = hf_tokenizer(local_path)
+        except OSError:
+            # If the model path doesn't contain a tokenizer, we use the tokenizer path specified in the config
+            tokenizer_local_path = copy_local_path_from_hdfs(self.config.model.tokenizer_path)
+            self.tokenizer = hf_tokenizer(tokenizer_local_path)
 
         # Step 2: get the actor_model_config
         critic_model_config = AutoConfig.from_pretrained(local_path)
@@ -639,15 +644,31 @@ def __init__(self, config):
             self.config.micro_batch_size //= mpu.get_data_parallel_world_size()
             self.config.micro_batch_size_per_gpu = self.config.micro_batch_size
 
-    def _build_rm_model(self, model_path, megatron_config: ModelParallelConfig, override_model_config):
+    def _build_rm_model(self,
+                        model_path,
+                        megatron_config: ModelParallelConfig,
+                        override_model_config,
+                        sft_tokenizer=None,
+                        rm_tokenizer=None):
         from megatron.core.models.gpt.gpt_model import ModelType
         from verl.utils.model import print_model_size, update_model_config
         from verl.utils.megatron_utils import get_model
         from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
 
         # Step 1: initialize the tokenizer
         local_path = copy_local_path_from_hdfs(model_path)
-        self.tokenizer = hf_tokenizer(local_path)
+        try:
+            self.tokenizer = hf_tokenizer(local_path)
+        except OSError:
+            # If the model path doesn't contain a tokenizer, we use rm_tokenizer or sft_tokenizer
+            if rm_tokenizer:
+                self.tokenizer = rm_tokenizer
+            else:
+                self.tokenizer = sft_tokenizer
+                if self.rank == 0:
+                    # If we use sft_tokenizer, we should print a warning message to inform the user
+                    # as this may cause some unexpected behavior
+                    logger.warning('Using input_tokenizer as the reward model tokenizer')
 
         # Step 2: get the actor_model_config
         rm_model_config = AutoConfig.from_pretrained(local_path)
@@ -735,6 +756,8 @@ def init_model(self):
             model_path=self.config.model.path,
             megatron_config=megatron_config,
             override_model_config=override_model_config,
+            sft_tokenizer=sft_tokenizer,
+            rm_tokenizer=rm_tokenizer,
         )
         # FIXME(sgm): reward model param offload is implemented in MegatronRewardModel
         # should be implemented in workers