Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unable to finetune transformer based ner model after initial tuning #13710

Open
jlustgarten opened this issue Dec 6, 2024 Discussed in #13394 · 0 comments
Open

Unable to finetune transformer based ner model after initial tuning #13710

jlustgarten opened this issue Dec 6, 2024 Discussed in #13394 · 0 comments

Comments

@jlustgarten
Copy link

Discussed in #13394

Originally posted by jlustgarten March 23, 2024

How to reproduce the behaviour

Create a transformer ner model
Train it on data using the cfg and cli which auto-saves it
Create a new cfg file that points to your existing model
Try triggering the training using the CLI
You will get a missing config.json error

Your Environment

  • spaCy version: 3.7.2
  • Platform: Linux-5.15.146.1-microsoft-standard-WSL2-x86_64-with-glibc2.35
  • Python version: 3.10.13

This still is occurring with the same text:
Config:
[paths]
train = null
dev = null
vectors = null
init_tok2vec = null

[system]
gpu_allocator = "pytorch"
seed = 0

[nlp]
lang = "en"
pipeline = ["transformer","ner"]
batch_size = 128
disabled = []
before_creation = null
after_creation = null
after_pipeline_creation = null
tokenizer = {"@Tokenizers":"spacy.Tokenizer.v1"}
vectors = {"@vectors":"spacy.Vectors.v1"}

[components]

[components.ner]
factory = "ner"
incorrect_spans_key = null
moves = null
scorer = {"https://github.com/scorers":"spacy.ner_scorer.v1"}
update_with_oracle_cut_size = 100

[components.ner.model]
@architectures = "spacy.TransitionBasedParser.v2"
state_type = "ner"
extra_state_tokens = false
hidden_width = 64
maxout_pieces = 2
use_upper = false
nO = null

[components.ner.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
grad_factor = 1.0
pooling = {"https://github.com/layers":"reduce_mean.v1"}
upstream = "*"

[components.transformer]
factory = "transformer"
max_batch_items = 4096
set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}

[components.transformer.model]
@architectures = "spacy-transformers.TransformerModel.v3"
name = "/home/user/Coding/PatientHistory/original_pt_hist_ner"
mixed_precision = false

[components.transformer.model.get_spans]
@span_getters = "spacy-transformers.strided_spans.v1"
window = 128
stride = 96

[components.transformer.model.grad_scaler_config]

[components.transformer.model.tokenizer_config]
use_fast = true

[components.transformer.model.transformer_config]

[corpora]

[corpora.dev]
@readers = "spacy.Corpus.v1"
path = ${paths.dev}
max_length = 0
gold_preproc = false
limit = 0
augmenter = null

[corpora.train]
@readers = "spacy.Corpus.v1"
path = ${paths.train}
max_length = 0
gold_preproc = false
limit = 0
augmenter = null

[training]
accumulate_gradient = 4
dev_corpus = "corpora.dev"
train_corpus = "corpora.train"
seed = ${system.seed}
gpu_allocator = ${system.gpu_allocator}
dropout = 0.1
patience = 2000
max_epochs = 0
max_steps = 80000
eval_frequency = 200
frozen_components = []
annotating_components = []
before_to_disk = null
before_update = null

[training.batcher]
@batchers = "spacy.batch_by_padded.v1"
discard_oversize = false
size = 2000
buffer = 256
get_length = null

[training.logger]
@Loggers = "spacy.ConsoleLogger.v1"
progress_bar = false

[training.optimizer]
https://github.com/optimizers = "Adam.v1"
beta1 = 0.9
beta2 = 0.999
L2_is_weight_decay = true
L2 = 0.01
grad_clip = 1.0
use_averages = false
eps = 0.00000001

[training.optimizer.learn_rate]
https://github.com/schedules = "warmup_linear.v1"
warmup_steps = 250
total_steps = 200000
initial_rate = 0.00005

[training.score_weights]
ents_f = 1.0
ents_p = 0.0
ents_r = 0.0
ents_per_type = null

[pretraining]

[initialize]
vectors = ${paths.vectors}
init_tok2vec = ${paths.init_tok2vec}
vocab_data = null
lookups = null
before_init = null
after_init = null

[initialize.components]

[initialize.tokenizer]

Here's the CLI:
python -m spacy train '/home/user/Coding/PatientHistory/refine_pt_hist_ner.cfg' --output '/home/user/Coding/PatientHistory/improved_pt_hist_3_22_2024' --paths.train '/home/user/Coding/PatientHistory/train.spacy' --paths.dev '/home/user/Coding/PatientHistory/test.spacy' --gpu-id 0
Here's the output:
ℹ Saving to output directory:
/home/user/Coding/PatientHistory/improved_pt_hist_3_22_2024
ℹ Using GPU: 0

=========================== Initializing pipeline ===========================
/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/utils/generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
_torch_pytree._register_pytree_node(
/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/utils/generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.
_torch_pytree._register_pytree_node(
Traceback (most recent call last):
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/runpy.py", line 86, in _run_code
exec(code, run_globals)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/main.py", line 4, in
setup_cli()
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/cli/_util.py", line 87, in setup_cli
command(prog_name=COMMAND)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 1157, in call
return self.main(*args, **kwargs)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/typer/core.py", line 778, in main
return _main(
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/typer/core.py", line 216, in _main
rv = self.invoke(ctx)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 1688, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 1434, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/click/core.py", line 783, in invoke
return __callback(*args, **kwargs)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/typer/main.py", line 683, in wrapper
return callback(**use_params) # type: ignore
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/cli/train.py", line 54, in train_cli
train(config_path, output_path, use_gpu=use_gpu, overrides=overrides)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/cli/train.py", line 81, in train
nlp = init_nlp(config, use_gpu=use_gpu)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/training/initialize.py", line 95, in init_nlp
nlp.initialize(lambda: train_corpus(nlp), sgd=optimizer)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy/language.py", line 1349, in initialize
proc.initialize(get_examples, nlp=self, **p_settings)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy_transformers/pipeline_component.py", line 351, in initialize
self.model.initialize(X=docs)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/thinc/model.py", line 318, in initialize
self.init(self, X=X, Y=Y)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy_transformers/layers/transformer_model.py", line 131, in init
hf_model = huggingface_from_pretrained(name, tok_cfg, trf_cfg)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/spacy_transformers/layers/transformer_model.py", line 267, in huggingface_from_pretrained
tokenizer = tokenizer_cls.from_pretrained(str_path, **tok_config)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py", line 752, in from_pretrained
config = AutoConfig.from_pretrained(
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py", line 1082, in from_pretrained
config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/configuration_utils.py", line 644, in get_config_dict
config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/configuration_utils.py", line 699, in _get_config_dict
resolved_config_file = cached_file(
File "/home/user/miniconda3/envs/patienthistoryclassifier/lib/python3.10/site-packages/transformers/utils/hub.py", line 360, in cached_file
raise EnvironmentError(
OSError: /home/user/Coding/PatientHistory/original_pt_hist_ner does not appear to have a file named config.json. Checkout 'https://huggingface.co//home/user/Coding/PatientHistory/original_pt_hist_ner/None' for available files.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant