-
Notifications
You must be signed in to change notification settings - Fork 327
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
微调chatglm3-6b报错Could not locate the tokenization_chatglm.py inside THUDM/chatglm3-6b. #488
Comments
config 中,是否给 |
我的config文件是从模板中copy出来并将模型和数据集改成自己的,其他没有动,包括process_hf_dataset. # Copyright (c) OpenMMLab. All rights reserved.
import torch
from datasets import load_dataset
from mmengine.dataset import DefaultSampler
from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook,
LoggerHook, ParamSchedulerHook)
from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR
from peft import LoraConfig
from torch.optim import AdamW
from transformers import (AutoModelForCausalLM, AutoTokenizer,
BitsAndBytesConfig)
from xtuner.dataset import process_hf_dataset
from xtuner.dataset.collate_fns import default_collate_fn
from xtuner.dataset.map_fns import alpaca_map_fn, template_map_fn_factory
from xtuner.engine.hooks import (DatasetInfoHook, EvaluateChatHook,
VarlenAttnArgsToMessageHubHook)
from xtuner.engine.runner import TrainLoop
from xtuner.model import SupervisedFinetune
from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE
#######################################################################
# PART 1 Settings #
#######################################################################
# Model
pretrained_model_name_or_path = '/mnt/hysc/models/chatglm3-6b'
use_varlen_attn = False
# Data
data_path = 'xxxx'
prompt_template = PROMPT_TEMPLATE.chatglm3
max_length = 2048
pack_to_max_length = True
# Scheduler & Optimizer
batch_size = 1 # per_device
accumulative_counts = 16
dataloader_num_workers = 0
max_epochs = 3
optim_type = AdamW
lr = 2e-4
betas = (0.9, 0.999)
weight_decay = 0
max_norm = 1 # grad clip
warmup_ratio = 0.03
# Save
save_steps = 500
save_total_limit = 2 # Maximum checkpoints to keep (-1 means unlimited)
# Evaluate the generation performance during the training
evaluation_freq = 500
SYSTEM = ""
evaluation_inputs = [
]
#######################################################################
# PART 2 Model & Tokenizer #
#######################################################################
tokenizer = dict(
type=AutoTokenizer.from_pretrained,
pretrained_model_name_or_path=pretrained_model_name_or_path,
trust_remote_code=True,
encode_special_tokens=True,
padding_side='left')
model = dict(
type=SupervisedFinetune,
use_varlen_attn=use_varlen_attn,
llm=dict(
type=AutoModelForCausalLM.from_pretrained,
pretrained_model_name_or_path=pretrained_model_name_or_path,
trust_remote_code=True,
torch_dtype=torch.float16,
quantization_config=dict(
type=BitsAndBytesConfig,
load_in_4bit=True,
load_in_8bit=False,
llm_int8_threshold=6.0,
llm_int8_has_fp16_weight=False,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type='nf4')),
lora=dict(
type=LoraConfig,
r=64,
lora_alpha=16,
lora_dropout=0.1,
bias='none',
task_type='CAUSAL_LM'))
#######################################################################
# PART 3 Dataset & Dataloader #
#######################################################################
train_dataset = dict(
type=process_hf_dataset,
ataset=dict(
type=load_dataset, path='json', data_files=dict(train=data_path)),
tokenizer=tokenizer,
max_length=max_length,
dataset_map_fn=None,
template_map_fn=dict(
type=template_map_fn_factory, template=prompt_template),
remove_unused_columns=True,
shuffle_before_pack=True,
pack_to_max_length=pack_to_max_length,
use_varlen_attn=use_varlen_attn)
train_dataloader = dict(
batch_size=batch_size,
num_workers=dataloader_num_workers,
dataset=train_dataset,
sampler=dict(type=DefaultSampler, shuffle=True),
collate_fn=dict(type=default_collate_fn, use_varlen_attn=use_varlen_attn))
#######################################################################
# PART 4 Scheduler & Optimizer #
#######################################################################
# optimizer
optim_wrapper = dict(
type=AmpOptimWrapper,
optimizer=dict(
type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay),
clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False),
accumulative_counts=accumulative_counts,
loss_scale='dynamic',
dtype='float16')
# learning policy
# More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501
param_scheduler = [
dict(
type=LinearLR,
start_factor=1e-5,
by_epoch=True,
begin=0,
end=warmup_ratio * max_epochs,
convert_to_iter_based=True),
dict(
type=CosineAnnealingLR,
eta_min=0.0,
by_epoch=True,
begin=warmup_ratio * max_epochs,
end=max_epochs,
convert_to_iter_based=True)
]
# train, val, test setting
train_cfg = dict(type=TrainLoop, max_epochs=max_epochs)
#######################################################################
# PART 5 Runtime #
#######################################################################
# Log the dialogue periodically during the training process, optional
custom_hooks = [
dict(type=DatasetInfoHook, tokenizer=tokenizer),
dict(
type=EvaluateChatHook,
tokenizer=tokenizer,
every_n_iters=evaluation_freq,
evaluation_inputs=evaluation_inputs,
system=SYSTEM,
prompt_template=prompt_template)
]
if use_varlen_attn:
custom_hooks += [dict(type=VarlenAttnArgsToMessageHubHook)]
# configure default hooks
default_hooks = dict(
# record the time of every iteration.
timer=dict(type=IterTimerHook),
# print log every 10 iterations.
logger=dict(type=LoggerHook, log_metric_by_epoch=False, interval=10),
# enable the parameter scheduler.
param_scheduler=dict(type=ParamSchedulerHook),
# save checkpoint per `save_steps`.
checkpoint=dict(
type=CheckpointHook,
by_epoch=False,
interval=save_steps,
max_keep_ckpts=save_total_limit),
# set sampler seed in distributed evrionment.
sampler_seed=dict(type=DistSamplerSeedHook),
)
# configure environment
env_cfg = dict(
# whether to enable cudnn benchmark
cudnn_benchmark=False,
# set multi process parameters
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
# set distributed parameters
dist_cfg=dict(backend='nccl'),
)
# set visualizer
visualizer = None
# set log level
log_level = 'INFO'
# load from which checkpoint
load_from = None
# whether to resume training from the loaded checkpoint
resume = False
# Defaults to use random seed and disable `deterministic`
randomness = dict(seed=None, deterministic=False)
# set log processor
log_processor = dict(by_epoch=False) |
train_dataset = dict(
type=process_hf_dataset,
- ataset=dict(
+ dataset=dict(
type=load_dataset, path='json', data_files=dict(train=data_path)),
tokenizer=tokenizer,
max_length=max_length,
dataset_map_fn=None,
template_map_fn=dict(
type=template_map_fn_factory, template=prompt_template),
remove_unused_columns=True,
shuffle_before_pack=True,
pack_to_max_length=pack_to_max_length,
use_varlen_attn=use_varlen_attn) |
@LZHgrla Traceback (most recent call last):
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/xtuner/tools/train.py", line 307, in <module>
main()
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/xtuner/tools/train.py", line 296, in main
runner = Runner.from_cfg(cfg)
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/mmengine/runner/runner.py", line 462, in from_cfg
runner = cls(
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/mmengine/runner/runner.py", line 429, in __init__
self.model = self.build_model(model)
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/mmengine/runner/runner.py", line 836, in build_model
model = MODELS.build(model)
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/mmengine/registry/registry.py", line 570, in build
return self.build_func(cfg, *args, **kwargs, registry=self)
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/mmengine/registry/build_functions.py", line 232, in build_model_from_cfg
return build_from_cfg(cfg, registry, default_args)
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg
obj = obj_cls(**args) # type: ignore
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/xtuner/model/sft.py", line 27, in __init__
self.llm = self._build_from_cfg_or_module(llm)
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/xtuner/model/sft.py", line 91, in _build_from_cfg_or_module
return BUILDER.build(cfg_or_mod)
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/mmengine/registry/registry.py", line 570, in build
return self.build_func(cfg, *args, **kwargs, registry=self)
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/mmengine/registry/build_functions.py", line 121, in build_from_cfg
obj = obj_cls(**args) # type: ignore
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 556, in from_pretrained
return model_class.from_pretrained(
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/transformers/modeling_utils.py", line 3502, in from_pretrained
) = cls._load_pretrained_model(
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/transformers/modeling_utils.py", line 3926, in _load_pretrained_model
new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/transformers/modeling_utils.py", line 802, in _load_state_dict_into_meta_model
or (not hf_quantizer.check_quantized_param(model, param, param_name, state_dict))
File "/root/anaconda3/envs/xt-new/lib/python3.10/site-packages/transformers/quantizers/quantizer_bnb_4bit.py", line 124, in check_quantized_param
if isinstance(module._parameters[tensor_name], bnb.nn.Params4bit):
KeyError: 'inv_freq' |
@fan2goa1 |
感谢,降低到4.37.2后问题解决。 |
根据xtuner提供的chatglm3-alpaca-qlora的config文件进行了相关更改,采用加载本地模型和自创数据集(格式没有问题)。按照同样的方法微调internlm、mistral、qwen都没有问题,但是微调chatglm3-6b时报错:
按照网上的修改,将tokenizer_config.json中的
改为
之后仍会报其他错:
The text was updated successfully, but these errors were encountered: