Skip to content

Commit

Permalink
Remove static pretrained maps from the library's internals
Browse files Browse the repository at this point in the history
  • Loading branch information
LysandreJik committed Mar 5, 2024
1 parent a69cbf4 commit c27ba61
Show file tree
Hide file tree
Showing 1,009 changed files with 1,396 additions and 10,404 deletions.
1,157 changes: 189 additions & 968 deletions src/transformers/__init__.py

Large diffs are not rendered by default.

15 changes: 1 addition & 14 deletions src/transformers/commands/add_new_model_like.py
Original file line number Diff line number Diff line change
Expand Up @@ -785,7 +785,6 @@ def retrieve_info_for_model(model_type, frameworks: Optional[List[str]] = None):

model_name = auto_module.MODEL_NAMES_MAPPING[model_type]
config_class = auto_module.configuration_auto.CONFIG_MAPPING_NAMES[model_type]
archive_map = auto_module.configuration_auto.CONFIG_ARCHIVE_MAP_MAPPING_NAMES.get(model_type, None)
if model_type in auto_module.tokenization_auto.TOKENIZER_MAPPING_NAMES:
tokenizer_classes = auto_module.tokenization_auto.TOKENIZER_MAPPING_NAMES[model_type]
tokenizer_class = tokenizer_classes[0] if tokenizer_classes[0] is not None else tokenizer_classes[1]
Expand Down Expand Up @@ -814,19 +813,7 @@ def retrieve_info_for_model(model_type, frameworks: Optional[List[str]] = None):

model_classes = retrieve_model_classes(model_type, frameworks=frameworks)

# Retrieve model upper-cased name from the constant name of the pretrained archive map.
if archive_map is None:
model_upper_cased = model_camel_cased.upper()
else:
parts = archive_map.split("_")
idx = 0
while idx < len(parts) and parts[idx] != "PRETRAINED":
idx += 1
if idx < len(parts):
model_upper_cased = "_".join(parts[:idx])
else:
model_upper_cased = model_camel_cased.upper()

model_upper_cased = model_camel_cased.upper()
model_patterns = ModelPatterns(
model_name,
checkpoint=find_base_model_checkpoint(model_type, model_files=model_files),
Expand Down
50 changes: 0 additions & 50 deletions src/transformers/convert_pytorch_checkpoint_to_tf2.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,6 @@
import os

from . import (
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
BART_PRETRAINED_MODEL_ARCHIVE_LIST,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST,
DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST,
DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST,
ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,
FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,
LAYOUTLM_PRETRAINED_MODEL_ARCHIVE_LIST,
LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
T5_PRETRAINED_CONFIG_ARCHIVE_MAP,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP,
WAV_2_VEC_2_PRETRAINED_CONFIG_ARCHIVE_MAP,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,
XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,
AlbertConfig,
BartConfig,
BertConfig,
Expand Down Expand Up @@ -140,31 +118,26 @@
TFBartForConditionalGeneration,
TFBartForSequenceClassification,
BartForConditionalGeneration,
BART_PRETRAINED_MODEL_ARCHIVE_LIST,
),
"bert": (
BertConfig,
TFBertForPreTraining,
BertForPreTraining,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"google-bert/bert-large-uncased-whole-word-masking-finetuned-squad": (
BertConfig,
TFBertForQuestionAnswering,
BertForQuestionAnswering,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"google-bert/bert-large-cased-whole-word-masking-finetuned-squad": (
BertConfig,
TFBertForQuestionAnswering,
BertForQuestionAnswering,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"google-bert/bert-base-cased-finetuned-mrpc": (
BertConfig,
TFBertForSequenceClassification,
BertForSequenceClassification,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"dpr": (
DPRConfig,
Expand All @@ -174,130 +147,107 @@
DPRQuestionEncoder,
DPRContextEncoder,
DPRReader,
DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST,
DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST,
DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST,
),
"openai-community/gpt2": (
GPT2Config,
TFGPT2LMHeadModel,
GPT2LMHeadModel,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"xlnet": (
XLNetConfig,
TFXLNetLMHeadModel,
XLNetLMHeadModel,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"xlm": (
XLMConfig,
TFXLMWithLMHeadModel,
XLMWithLMHeadModel,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"xlm-roberta": (
XLMRobertaConfig,
TFXLMRobertaForMaskedLM,
XLMRobertaForMaskedLM,
XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"transfo-xl": (
TransfoXLConfig,
TFTransfoXLLMHeadModel,
TransfoXLLMHeadModel,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"openai-community/openai-gpt": (
OpenAIGPTConfig,
TFOpenAIGPTLMHeadModel,
OpenAIGPTLMHeadModel,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"roberta": (
RobertaConfig,
TFRobertaForCausalLM,
TFRobertaForMaskedLM,
RobertaForMaskedLM,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"layoutlm": (
LayoutLMConfig,
TFLayoutLMForMaskedLM,
LayoutLMForMaskedLM,
LAYOUTLM_PRETRAINED_MODEL_ARCHIVE_LIST,
),
"FacebookAI/roberta-large-mnli": (
RobertaConfig,
TFRobertaForSequenceClassification,
RobertaForSequenceClassification,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"camembert": (
CamembertConfig,
TFCamembertForMaskedLM,
CamembertForMaskedLM,
CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"flaubert": (
FlaubertConfig,
TFFlaubertWithLMHeadModel,
FlaubertWithLMHeadModel,
FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"distilbert": (
DistilBertConfig,
TFDistilBertForMaskedLM,
DistilBertForMaskedLM,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"distilbert-base-distilled-squad": (
DistilBertConfig,
TFDistilBertForQuestionAnswering,
DistilBertForQuestionAnswering,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"lxmert": (
LxmertConfig,
TFLxmertForPreTraining,
LxmertForPreTraining,
LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"lxmert-visual-feature-encoder": (
LxmertConfig,
TFLxmertVisualFeatureEncoder,
LxmertVisualFeatureEncoder,
LXMERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"Salesforce/ctrl": (
CTRLConfig,
TFCTRLLMHeadModel,
CTRLLMHeadModel,
CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"albert": (
AlbertConfig,
TFAlbertForPreTraining,
AlbertForPreTraining,
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"t5": (
T5Config,
TFT5ForConditionalGeneration,
T5ForConditionalGeneration,
T5_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"electra": (
ElectraConfig,
TFElectraForPreTraining,
ElectraForPreTraining,
ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"wav2vec2": (
Wav2Vec2Config,
TFWav2Vec2Model,
Wav2Vec2Model,
WAV_2_VEC_2_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
}

Expand Down
8 changes: 2 additions & 6 deletions src/transformers/models/albert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@


_import_structure = {
"configuration_albert": ["ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "AlbertConfig", "AlbertOnnxConfig"],
"configuration_albert": ["AlbertConfig", "AlbertOnnxConfig"],
}

try:
Expand All @@ -52,7 +52,6 @@
pass
else:
_import_structure["modeling_albert"] = [
"ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
"AlbertForMaskedLM",
"AlbertForMultipleChoice",
"AlbertForPreTraining",
Expand All @@ -71,7 +70,6 @@
pass
else:
_import_structure["modeling_tf_albert"] = [
"TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
"TFAlbertForMaskedLM",
"TFAlbertForMultipleChoice",
"TFAlbertForPreTraining",
Expand Down Expand Up @@ -101,7 +99,7 @@
]

if TYPE_CHECKING:
from .configuration_albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig, AlbertOnnxConfig
from .configuration_albert import AlbertConfig, AlbertOnnxConfig

try:
if not is_sentencepiece_available():
Expand All @@ -126,7 +124,6 @@
pass
else:
from .modeling_albert import (
ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
AlbertForMaskedLM,
AlbertForMultipleChoice,
AlbertForPreTraining,
Expand All @@ -145,7 +142,6 @@
pass
else:
from .modeling_tf_albert import (
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
TFAlbertForMaskedLM,
TFAlbertForMultipleChoice,
TFAlbertForPreTraining,
Expand Down
12 changes: 0 additions & 12 deletions src/transformers/models/albert/configuration_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,6 @@
from ...onnx import OnnxConfig


ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
"albert/albert-base-v1": "https://huggingface.co/albert/albert-base-v1/resolve/main/config.json",
"albert/albert-large-v1": "https://huggingface.co/albert/albert-large-v1/resolve/main/config.json",
"albert/albert-xlarge-v1": "https://huggingface.co/albert/albert-xlarge-v1/resolve/main/config.json",
"albert/albert-xxlarge-v1": "https://huggingface.co/albert/albert-xxlarge-v1/resolve/main/config.json",
"albert/albert-base-v2": "https://huggingface.co/albert/albert-base-v2/resolve/main/config.json",
"albert/albert-large-v2": "https://huggingface.co/albert/albert-large-v2/resolve/main/config.json",
"albert/albert-xlarge-v2": "https://huggingface.co/albert/albert-xlarge-v2/resolve/main/config.json",
"albert/albert-xxlarge-v2": "https://huggingface.co/albert/albert-xxlarge-v2/resolve/main/config.json",
}


class AlbertConfig(PretrainedConfig):
r"""
This is the configuration class to store the configuration of a [`AlbertModel`] or a [`TFAlbertModel`]. It is used
Expand Down
13 changes: 0 additions & 13 deletions src/transformers/models/albert/modeling_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,19 +52,6 @@
_CONFIG_FOR_DOC = "AlbertConfig"


ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"albert/albert-base-v1",
"albert/albert-large-v1",
"albert/albert-xlarge-v1",
"albert/albert-xxlarge-v1",
"albert/albert-base-v2",
"albert/albert-large-v2",
"albert/albert-xlarge-v2",
"albert/albert-xxlarge-v2",
# See all ALBERT models at https://huggingface.co/models?filter=albert
]


def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
"""Load tf checkpoints in a pytorch model."""
try:
Expand Down
12 changes: 0 additions & 12 deletions src/transformers/models/albert/modeling_tf_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,18 +65,6 @@
_CHECKPOINT_FOR_DOC = "albert/albert-base-v2"
_CONFIG_FOR_DOC = "AlbertConfig"

TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"albert/albert-base-v1",
"albert/albert-large-v1",
"albert/albert-xlarge-v1",
"albert/albert-xxlarge-v1",
"albert/albert-base-v2",
"albert/albert-large-v2",
"albert/albert-xlarge-v2",
"albert/albert-xxlarge-v2",
# See all ALBERT models at https://huggingface.co/models?filter=albert
]


class TFAlbertPreTrainingLoss:
"""
Expand Down
25 changes: 0 additions & 25 deletions src/transformers/models/albert/tokenization_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,29 +29,6 @@
logger = logging.get_logger(__name__)
VOCAB_FILES_NAMES = {"vocab_file": "spiece.model"}

PRETRAINED_VOCAB_FILES_MAP = {
"vocab_file": {
"albert/albert-base-v1": "https://huggingface.co/albert/albert-base-v1/resolve/main/spiece.model",
"albert/albert-large-v1": "https://huggingface.co/albert/albert-large-v1/resolve/main/spiece.model",
"albert/albert-xlarge-v1": "https://huggingface.co/albert/albert-xlarge-v1/resolve/main/spiece.model",
"albert/albert-xxlarge-v1": "https://huggingface.co/albert/albert-xxlarge-v1/resolve/main/spiece.model",
"albert/albert-base-v2": "https://huggingface.co/albert/albert-base-v2/resolve/main/spiece.model",
"albert/albert-large-v2": "https://huggingface.co/albert/albert-large-v2/resolve/main/spiece.model",
"albert/albert-xlarge-v2": "https://huggingface.co/albert/albert-xlarge-v2/resolve/main/spiece.model",
"albert/albert-xxlarge-v2": "https://huggingface.co/albert/albert-xxlarge-v2/resolve/main/spiece.model",
}
}

PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
"albert/albert-base-v1": 512,
"albert/albert-large-v1": 512,
"albert/albert-xlarge-v1": 512,
"albert/albert-xxlarge-v1": 512,
"albert/albert-base-v2": 512,
"albert/albert-large-v2": 512,
"albert/albert-xlarge-v2": 512,
"albert/albert-xxlarge-v2": 512,
}

SPIECE_UNDERLINE = "▁"

Expand Down Expand Up @@ -130,8 +107,6 @@ class AlbertTokenizer(PreTrainedTokenizer):
"""

vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES

def __init__(
self,
Expand Down
Loading

0 comments on commit c27ba61

Please sign in to comment.