Skip to content

Commit

Permalink
[megatron] fix: critic and reward model load tokenizer from config
Browse files Browse the repository at this point in the history
Currently, the worker will fail if the critic or reward model path
doesn't contain a tokenizer. This PR tries to fix this by loading
tokenizer from the config for the previously mentioned case.

- For the critic model, we fall back to load from
  `critic.model.tokenizer_path`.
- For the reward model, we first fall back to load from
  `reward_model.model.rm_tokenizer`, and then
  `reward_model.model.input_tokenizer` if that is not set.

Signed-off-by: Hollow Man <hollowman@opensuse.org>
  • Loading branch information
HollowMan6 committed Feb 19, 2025
1 parent 55a4d3c commit ab24a03
Showing 1 changed file with 26 additions and 3 deletions.
29 changes: 26 additions & 3 deletions verl/workers/megatron_workers.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,12 @@ def _build_critic_model_optimizer(self,

# Step 1: initialize the tokenizer
local_path = copy_local_path_from_hdfs(model_path)
self.tokenizer = hf_tokenizer(local_path)
try:
self.tokenizer = hf_tokenizer(local_path)
except OSError:
# If the model path doesn't contain a tokenizer, we use the tokenizer path specified in the config
tokenizer_local_path = copy_local_path_from_hdfs(self.config.model.tokenizer_path)
self.tokenizer = hf_tokenizer(tokenizer_local_path)

# Step 2: get the actor_model_config
critic_model_config = AutoConfig.from_pretrained(local_path)
Expand Down Expand Up @@ -639,15 +644,31 @@ def __init__(self, config):
self.config.micro_batch_size //= mpu.get_data_parallel_world_size()
self.config.micro_batch_size_per_gpu = self.config.micro_batch_size

def _build_rm_model(self, model_path, megatron_config: ModelParallelConfig, override_model_config):
def _build_rm_model(self,
model_path,
megatron_config: ModelParallelConfig,
override_model_config,
sft_tokenizer=None,
rm_tokenizer=None):
from megatron.core.models.gpt.gpt_model import ModelType
from verl.utils.model import print_model_size, update_model_config
from verl.utils.megatron_utils import get_model
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig

# Step 1: initialize the tokenizer
local_path = copy_local_path_from_hdfs(model_path)
self.tokenizer = hf_tokenizer(local_path)
try:
self.tokenizer = hf_tokenizer(local_path)
except OSError:
# If the model path doesn't contain a tokenizer, we use rm_tokenizer or sft_tokenizer
if rm_tokenizer:
self.tokenizer = rm_tokenizer
else:
self.tokenizer = sft_tokenizer
if self.rank == 0:
# If we use sft_tokenizer, we should print a warning message to inform the user
# as this may cause some unexpected behavior
logger.warning('Using input_tokenizer as the reward model tokenizer')

# Step 2: get the actor_model_config
rm_model_config = AutoConfig.from_pretrained(local_path)
Expand Down Expand Up @@ -735,6 +756,8 @@ def init_model(self):
model_path=self.config.model.path,
megatron_config=megatron_config,
override_model_config=override_model_config,
sft_tokenizer=sft_tokenizer,
rm_tokenizer=rm_tokenizer,
)
# FIXME(sgm): reward model param offload is implemented in MegatronRewardModel
# should be implemented in workers
Expand Down

0 comments on commit ab24a03

Please sign in to comment.