diff --git a/ISSUES.md b/ISSUES.md index 5985dad3d7c2..593a7d961b13 100644 --- a/ISSUES.md +++ b/ISSUES.md @@ -72,7 +72,7 @@ You are not required to read the following guidelines before opening an issue. H from . import dependency_versions_check File "/transformers/src/transformers/dependency_versions_check.py", line 34, in from .utils import is_tokenizers_available - File "/transformers/src/transformers/file_utils.py", line 40, in + File "/transformers/src/transformers/utils/import_utils.py", line 40, in from tqdm.auto import tqdm ModuleNotFoundError: No module named 'tqdm.auto' ``` @@ -125,7 +125,7 @@ You are not required to read the following guidelines before opening an issue. H from . import dependency_versions_check File "/transformers/src/transformers/dependency_versions_check.py", line 34, in from .utils import is_tokenizers_available - File "/transformers/src/transformers/file_utils.py", line 40, in + File "/transformers/src/transformers/utils/import_utils.py", line 40, in from tqdm.auto import tqdm ModuleNotFoundError: No module named 'tqdm.auto' ``` diff --git a/docs/README.md b/docs/README.md index acfa2d0be09c..053a4ca2f18c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -172,9 +172,9 @@ adds a link to its documentation with this syntax: \[\`XXXClass\`\] or \[\`funct function to be in the main package. If you want to create a link to some internal class or function, you need to -provide its path. For instance: \[\`file_utils.ModelOutput\`\]. This will be converted into a link with -`file_utils.ModelOutput` in the description. To get rid of the path and only keep the name of the object you are -linking to in the description, add a ~: \[\`~file_utils.ModelOutput\`\] will generate a link with `ModelOutput` in the description. +provide its path. For instance: \[\`utils.ModelOutput\`\]. This will be converted into a link with +`utils.ModelOutput` in the description. To get rid of the path and only keep the name of the object you are +linking to in the description, add a ~: \[\`~utils.ModelOutput\`\] will generate a link with `ModelOutput` in the description. The same works for methods so you can either use \[\`XXXClass.method\`\] or \[~\`XXXClass.method\`\]. diff --git a/docs/source/add_new_model.mdx b/docs/source/add_new_model.mdx index 69c2377f9c39..98b426560f63 100644 --- a/docs/source/add_new_model.mdx +++ b/docs/source/add_new_model.mdx @@ -381,7 +381,7 @@ important. Here is some advice is to make your debugging environment as efficien original code so that you can directly input the ids instead of an input string. - Make sure that the model in your debugging setup is **not** in training mode, which often causes the model to yield random outputs due to multiple dropout layers in the model. Make sure that the forward pass in your debugging - environment is **deterministic** so that the dropout layers are not used. Or use *transformers.file_utils.set_seed* + environment is **deterministic** so that the dropout layers are not used. Or use *transformers.utils.set_seed* if the old and new implementations are in the same framework. The following section gives you more specific details/tips on how you can do this for *brand_new_bert*. diff --git a/docs/source/internal/file_utils.mdx b/docs/source/internal/file_utils.mdx index d0d568ce7985..936629314358 100644 --- a/docs/source/internal/file_utils.mdx +++ b/docs/source/internal/file_utils.mdx @@ -12,35 +12,35 @@ specific language governing permissions and limitations under the License. # General Utilities -This page lists all of Transformers general utility functions that are found in the file `file_utils.py`. +This page lists all of Transformers general utility functions that are found in the file `utils.py`. Most of those are only useful if you are studying the general code in the library. ## Enums and namedtuples -[[autodoc]] file_utils.ExplicitEnum +[[autodoc]] utils.ExplicitEnum -[[autodoc]] file_utils.PaddingStrategy +[[autodoc]] utils.PaddingStrategy -[[autodoc]] file_utils.TensorType +[[autodoc]] utils.TensorType ## Special Decorators -[[autodoc]] file_utils.add_start_docstrings +[[autodoc]] utils.add_start_docstrings -[[autodoc]] file_utils.add_start_docstrings_to_model_forward +[[autodoc]] utils.add_start_docstrings_to_model_forward -[[autodoc]] file_utils.add_end_docstrings +[[autodoc]] utils.add_end_docstrings -[[autodoc]] file_utils.add_code_sample_docstrings +[[autodoc]] utils.add_code_sample_docstrings -[[autodoc]] file_utils.replace_return_docstrings +[[autodoc]] utils.replace_return_docstrings ## Special Properties -[[autodoc]] file_utils.cached_property +[[autodoc]] utils.cached_property ## Other Utilities -[[autodoc]] file_utils._LazyModule +[[autodoc]] utils._LazyModule diff --git a/docs/source/internal/generation_utils.mdx b/docs/source/internal/generation_utils.mdx index c3e5f1936b1b..c8b42d91848e 100644 --- a/docs/source/internal/generation_utils.mdx +++ b/docs/source/internal/generation_utils.mdx @@ -25,7 +25,7 @@ Most of those are only useful if you are studying the code of the generate metho ## Generate Outputs The output of [`~generation_utils.GenerationMixin.generate`] is an instance of a subclass of -[`~file_utils.ModelOutput`]. This output is a data structure containing all the information returned +[`~utils.ModelOutput`]. This output is a data structure containing all the information returned by [`~generation_utils.GenerationMixin.generate`], but that can also be used as tuple or dictionary. Here's an example: diff --git a/docs/source/main_classes/model.mdx b/docs/source/main_classes/model.mdx index 4da5e72b7ed1..4e05ecf11daa 100644 --- a/docs/source/main_classes/model.mdx +++ b/docs/source/main_classes/model.mdx @@ -88,4 +88,4 @@ Due to Pytorch design, this functionality is only available for floating dtypes. ## Pushing to the Hub -[[autodoc]] file_utils.PushToHubMixin +[[autodoc]] utils.PushToHubMixin diff --git a/docs/source/main_classes/output.mdx b/docs/source/main_classes/output.mdx index e0ef92eebcd4..efca867a0435 100644 --- a/docs/source/main_classes/output.mdx +++ b/docs/source/main_classes/output.mdx @@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License. # Model outputs -All models have outputs that are instances of subclasses of [`~file_utils.ModelOutput`]. Those are +All models have outputs that are instances of subclasses of [`~utils.ModelOutput`]. Those are data structures containing all the information returned by the model, but that can also be used as tuples or dictionaries. @@ -57,7 +57,7 @@ documented on their corresponding model page. ## ModelOutput -[[autodoc]] file_utils.ModelOutput +[[autodoc]] utils.ModelOutput - to_tuple ## BaseModelOutput diff --git a/docs/source/main_classes/trainer.mdx b/docs/source/main_classes/trainer.mdx index f773796d2a50..a3c57b51f570 100644 --- a/docs/source/main_classes/trainer.mdx +++ b/docs/source/main_classes/trainer.mdx @@ -40,7 +40,7 @@ The [`Trainer`] contains the basic training loop which supports the above featur The [`Trainer`] class is optimized for 🤗 Transformers models and can have surprising behaviors when you use it on other models. When using it on your own model, make sure: -- your model always return tuples or subclasses of [`~file_utils.ModelOutput`]. +- your model always return tuples or subclasses of [`~utils.ModelOutput`]. - your model can compute the loss if a `labels` argument is provided and that loss is returned as the first element of the tuple (if your model returns tuples) - your model can accept multiple label arguments (use the `label_names` in your [`TrainingArguments`] to indicate their name to the [`Trainer`]) but none of them should be named `"label"`. diff --git a/docs/source/performance.mdx b/docs/source/performance.mdx index 25d78ee326a3..0303fe789059 100644 --- a/docs/source/performance.mdx +++ b/docs/source/performance.mdx @@ -855,7 +855,7 @@ If you need to switch a tensor to bf16, it's just: `t.to(dtype=torch.bfloat16)` Here is how you can check if your setup supports bf16: ``` -python -c 'import transformers; print(f"BF16 support is {transformers.file_utils.is_torch_bf16_available()}")' +python -c 'import transformers; print(f"BF16 support is {transformers.utils.is_torch_bf16_available()}")' ``` On the other hand bf16 has a much worse precision than fp16, so there are certain situations where you'd still want to use fp16 and not bf16. diff --git a/examples/pytorch/multiple-choice/run_swag.py b/examples/pytorch/multiple-choice/run_swag.py index a72ffde3f7a2..eb9f52f4d54a 100755 --- a/examples/pytorch/multiple-choice/run_swag.py +++ b/examples/pytorch/multiple-choice/run_swag.py @@ -153,7 +153,7 @@ class DataCollatorForMultipleChoice: Args: tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]): The tokenizer used for encoding the data. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`): Select a strategy to pad the returned sequences (according to the model's padding side and padding index) among: diff --git a/examples/pytorch/multiple-choice/run_swag_no_trainer.py b/examples/pytorch/multiple-choice/run_swag_no_trainer.py index 4c60e72f3c67..451f2fc17b89 100755 --- a/examples/pytorch/multiple-choice/run_swag_no_trainer.py +++ b/examples/pytorch/multiple-choice/run_swag_no_trainer.py @@ -193,7 +193,7 @@ class DataCollatorForMultipleChoice: Args: tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]): The tokenizer used for encoding the data. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`): Select a strategy to pad the returned sequences (according to the model's padding side and padding index) among: diff --git a/examples/tensorflow/multiple-choice/run_swag.py b/examples/tensorflow/multiple-choice/run_swag.py index c5ebc3106802..0c1c62de26fa 100644 --- a/examples/tensorflow/multiple-choice/run_swag.py +++ b/examples/tensorflow/multiple-choice/run_swag.py @@ -74,7 +74,7 @@ class DataCollatorForMultipleChoice: Args: tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]): The tokenizer used for encoding the data. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`): Select a strategy to pad the returned sequences (according to the model's padding side and padding index) among: diff --git a/src/transformers/commands/add_new_model_like.py b/src/transformers/commands/add_new_model_like.py index d8eece141b8c..31a5d714ab68 100644 --- a/src/transformers/commands/add_new_model_like.py +++ b/src/transformers/commands/add_new_model_like.py @@ -784,7 +784,7 @@ def clean_frameworks_in_init( indent = find_indent(lines[idx]) while find_indent(lines[idx]) >= indent or is_empty_line(lines[idx]): idx += 1 - # Remove the import from file_utils + # Remove the import from utils elif re_is_xxx_available.search(lines[idx]) is not None: line = lines[idx] for framework in to_remove: diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py index f274d7c636d9..f572cd9fd5a8 100755 --- a/src/transformers/configuration_utils.py +++ b/src/transformers/configuration_utils.py @@ -93,7 +93,7 @@ class PretrainedConfig(PushToHubMixin): output_attentions (`bool`, *optional*, defaults to `False`): Whether or not the model should returns all attentions. return_dict (`bool`, *optional*, defaults to `True`): - Whether or not the model should return a [`~transformers.file_utils.ModelOutput`] instead of a plain tuple. + Whether or not the model should return a [`~transformers.utils.ModelOutput`] instead of a plain tuple. is_encoder_decoder (`bool`, *optional*, defaults to `False`): Whether the model is used as an encoder/decoder or not. is_decoder (`bool`, *optional*, defaults to `False`): @@ -170,7 +170,7 @@ class PretrainedConfig(PushToHubMixin): output_scores (`bool`, *optional*, defaults to `False`): Whether the model should return the logits when used for generation. return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether the model should return a [`~transformers.file_utils.ModelOutput`] instead of a `torch.LongTensor`. + Whether the model should return a [`~transformers.utils.ModelOutput`] instead of a `torch.LongTensor`. forced_bos_token_id (`int`, *optional*): The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be the target @@ -379,7 +379,7 @@ def name_or_path(self, value): @property def use_return_dict(self) -> bool: """ - `bool`: Whether or not return [`~file_utils.ModelOutput`] instead of tuples. + `bool`: Whether or not return [`~utils.ModelOutput`] instead of tuples. """ # If torchscript is set, force `return_dict=False` to avoid jit errors return self.return_dict and not self.torchscript @@ -417,7 +417,7 @@ def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub: kwargs: - Additional key word arguments passed along to the [`~file_utils.PushToHubMixin.push_to_hub`] method. + Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. """ if os.path.isfile(save_directory): raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file") diff --git a/src/transformers/data/data_collator.py b/src/transformers/data/data_collator.py index c23c8b221cbb..1599cfab8a28 100644 --- a/src/transformers/data/data_collator.py +++ b/src/transformers/data/data_collator.py @@ -216,7 +216,7 @@ class DataCollatorWithPadding: Args: tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]): The tokenizer used for encoding the data. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`): Select a strategy to pad the returned sequences (according to the model's padding side and padding index) among: @@ -268,7 +268,7 @@ class DataCollatorForTokenClassification(DataCollatorMixin): Args: tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]): The tokenizer used for encoding the data. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`): Select a strategy to pad the returned sequences (according to the model's padding side and padding index) among: @@ -523,7 +523,7 @@ class DataCollatorForSeq2Seq: prepare the *decoder_input_ids* This is useful when using *label_smoothing* to avoid calculating loss twice. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`): Select a strategy to pad the returned sequences (according to the model's padding side and padding index) among: diff --git a/src/transformers/feature_extraction_sequence_utils.py b/src/transformers/feature_extraction_sequence_utils.py index 289f6c558a14..cbcdeb4acdf5 100644 --- a/src/transformers/feature_extraction_sequence_utils.py +++ b/src/transformers/feature_extraction_sequence_utils.py @@ -90,7 +90,7 @@ def pad( Instead of `List[float]` you can have tensors (numpy arrays, PyTorch tensors or TensorFlow tensors), see the note above for the return type. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`): Select a strategy to pad the returned sequences (according to the model's padding side and padding index) among: @@ -114,7 +114,7 @@ def pad( to the specific feature_extractor's default. [What are attention masks?](../glossary#attention-mask) - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of list of python integers. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py index dcdb6fa01dc0..953ef41ba7db 100644 --- a/src/transformers/feature_extraction_utils.py +++ b/src/transformers/feature_extraction_utils.py @@ -117,9 +117,9 @@ def convert_to_tensors(self, tensor_type: Optional[Union[str, TensorType]] = Non Convert the inner content to tensors. Args: - tensor_type (`str` or [`~file_utils.TensorType`], *optional*): - The type of tensors to use. If `str`, should be one of the values of the enum - [`~file_utils.TensorType`]. If `None`, no modification is done. + tensor_type (`str` or [`~utils.TensorType`], *optional*): + The type of tensors to use. If `str`, should be one of the values of the enum [`~utils.TensorType`]. If + `None`, no modification is done. """ if tensor_type is None: return self @@ -328,7 +328,7 @@ def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub: kwargs: - Additional key word arguments passed along to the [`~file_utils.PushToHubMixin.push_to_hub`] method. + Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. """ if os.path.isfile(save_directory): raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file") diff --git a/src/transformers/generation_flax_utils.py b/src/transformers/generation_flax_utils.py index 812a7202f0f8..75fa54bdceb0 100644 --- a/src/transformers/generation_flax_utils.py +++ b/src/transformers/generation_flax_utils.py @@ -241,7 +241,7 @@ def generate( should be prefixed with *decoder_*. Also accepts `encoder_outputs` to skip encoder part. Return: - [`~file_utils.ModelOutput`]. + [`~utils.ModelOutput`]. Examples: diff --git a/src/transformers/generation_tf_utils.py b/src/transformers/generation_tf_utils.py index 95c4b04d3254..e96eb191e6a2 100644 --- a/src/transformers/generation_tf_utils.py +++ b/src/transformers/generation_tf_utils.py @@ -469,7 +469,7 @@ def generate( output_scores (`bool`, *optional*, defaults to `False`): Whether or not to return the prediction scores. See `scores` under returned tensors for more details. return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. forced_bos_token_id (`int`, *optional*): The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be @@ -480,11 +480,11 @@ def generate( Additional model specific kwargs will be forwarded to the `forward` function of the model. Return: - [`~file_utils.ModelOutput`] or `tf.Tensor`: A [`~file_utils.ModelOutput`] (if - `return_dict_in_generate=True` or when `config.return_dict_in_generate=True`) or a `tf.Tensor`. + [`~utils.ModelOutput`] or `tf.Tensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True` or when + `config.return_dict_in_generate=True`) or a `tf.Tensor`. If the model is *not* an encoder-decoder model (`model.config.is_encoder_decoder=False`), the possible - [`~file_utils.ModelOutput`] types are: + [`~utils.ModelOutput`] types are: - [`~generation_tf_utils.TFGreedySearchDecoderOnlyOutput`], - [`~generation_tf_utils.TFSampleDecoderOnlyOutput`], @@ -492,7 +492,7 @@ def generate( - [`~generation_tf_utils.TFBeamSampleDecoderOnlyOutput`] If the model is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible - [`~file_utils.ModelOutput`] types are: + [`~utils.ModelOutput`] types are: - [`~generation_tf_utils.TFGreedySearchEncoderDecoderOutput`], - [`~generation_tf_utils.TFSampleEncoderDecoderOutput`], @@ -1370,7 +1370,7 @@ def _generate( output_scores (`bool`, *optional*, defaults to `False`): Whether or not to return the prediction scores. See `scores` under returned tensors for more details. return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. forced_bos_token_id (`int`, *optional*): The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be @@ -1381,11 +1381,11 @@ def _generate( Additional model specific kwargs will be forwarded to the `forward` function of the model. Return: - [`~file_utils.ModelOutput`] or `tf.Tensor`: A [`~file_utils.ModelOutput`] (if - `return_dict_in_generate=True` or when `config.return_dict_in_generate=True`) or a `tf.Tensor`. + [`~utils.ModelOutput`] or `tf.Tensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True` or when + `config.return_dict_in_generate=True`) or a `tf.Tensor`. If the model is *not* an encoder-decoder model (`model.config.is_encoder_decoder=False`), the possible - [`~file_utils.ModelOutput`] types are: + [`~utils.ModelOutput`] types are: - [`~generation_tf_utils.TFGreedySearchDecoderOnlyOutput`], - [`~generation_tf_utils.TFSampleDecoderOnlyOutput`], @@ -1393,7 +1393,7 @@ def _generate( - [`~generation_tf_utils.TFBeamSampleDecoderOnlyOutput`] If the model is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible - [`~file_utils.ModelOutput`] types are: + [`~utils.ModelOutput`] types are: - [`~generation_tf_utils.TFGreedySearchEncoderDecoderOutput`], - [`~generation_tf_utils.TFSampleEncoderDecoderOutput`], @@ -1822,7 +1822,7 @@ def greedy_search( output_scores (`bool`, *optional*, defaults to `False`): Whether or not to return the prediction scores. See `scores` under returned tensors for more details. return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. model_kwargs: Additional model specific keyword arguments will be forwarded to the `call` function of the model. If model is an encoder-decoder model the kwargs should include `encoder_outputs`. @@ -2085,7 +2085,7 @@ def sample( output_scores (`bool`, *optional*, defaults to `False`): Whether or not to return the prediction scores. See `scores` under returned tensors for more details. return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. model_kwargs: Additional model specific kwargs will be forwarded to the `call` function of the model. If model is an encoder-decoder model the kwargs should include `encoder_outputs`. diff --git a/src/transformers/generation_utils.py b/src/transformers/generation_utils.py index 9618b74ab206..072ddfc3742a 100644 --- a/src/transformers/generation_utils.py +++ b/src/transformers/generation_utils.py @@ -1003,7 +1003,7 @@ def generate( output_scores (`bool`, *optional*, defaults to `False`): Whether or not to return the prediction scores. See `scores` under returned tensors for more details. return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. forced_bos_token_id (`int`, *optional*): The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be @@ -1026,11 +1026,11 @@ def generate( should be prefixed with *decoder_*. Return: - [`~file_utils.ModelOutput`] or `torch.LongTensor`: A [`~file_utils.ModelOutput`] (if - `return_dict_in_generate=True` or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`. + [`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True` + or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`. If the model is *not* an encoder-decoder model (`model.config.is_encoder_decoder=False`), the possible - [`~file_utils.ModelOutput`] types are: + [`~utils.ModelOutput`] types are: - [`~generation_utils.GreedySearchDecoderOnlyOutput`], - [`~generation_utils.SampleDecoderOnlyOutput`], @@ -1038,7 +1038,7 @@ def generate( - [`~generation_utils.BeamSampleDecoderOnlyOutput`] If the model is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible - [`~file_utils.ModelOutput`] types are: + [`~utils.ModelOutput`] types are: - [`~generation_utils.GreedySearchEncoderDecoderOutput`], - [`~generation_utils.SampleEncoderDecoderOutput`], @@ -1531,7 +1531,7 @@ def greedy_search( output_scores (`bool`, *optional*, defaults to `False`): Whether or not to return the prediction scores. See `scores` under returned tensors for more details. return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. synced_gpus (`bool`, *optional*, defaults to `False`): Whether to continue running the while loop until max_length (needed for ZeRO stage 3) model_kwargs: @@ -1767,7 +1767,7 @@ def sample( output_scores (`bool`, *optional*, defaults to `False`): Whether or not to return the prediction scores. See `scores` under returned tensors for more details. return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. synced_gpus (`bool`, *optional*, defaults to `False`): Whether to continue running the while loop until max_length (needed for ZeRO stage 3) model_kwargs: @@ -2022,7 +2022,7 @@ def beam_search( output_scores (`bool`, *optional*, defaults to `False`): Whether or not to return the prediction scores. See `scores` under returned tensors for more details. return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. synced_gpus (`bool`, *optional*, defaults to `False`): Whether to continue running the while loop until max_length (needed for ZeRO stage 3) model_kwargs: @@ -2339,7 +2339,7 @@ def beam_sample( output_scores (`bool`, *optional*, defaults to `False`): Whether or not to return the prediction scores. See `scores` under returned tensors for more details. return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. synced_gpus (`bool`, *optional*, defaults to `False`): Whether to continue running the while loop until max_length (needed for ZeRO stage 3) model_kwargs: @@ -2656,7 +2656,7 @@ def group_beam_search( output_scores (`bool`, *optional*, defaults to `False`): Whether or not to return the prediction scores. See `scores` under returned tensors for more details. return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. synced_gpus (`bool`, *optional*, defaults to `False`): Whether to continue running the while loop until max_length (needed for ZeRO stage 3) @@ -3026,7 +3026,7 @@ def constrained_beam_search( output_scores (`bool`, *optional*, defaults to `False`): Whether or not to return the prediction scores. See `scores` under returned tensors for more details. return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. synced_gpus (`bool`, *optional*, defaults to `False`): Whether to continue running the while loop until max_length (needed for ZeRO stage 3) model_kwargs: diff --git a/src/transformers/modeling_flax_utils.py b/src/transformers/modeling_flax_utils.py index c298d6726b3a..dd9a7dc29fd7 100644 --- a/src/transformers/modeling_flax_utils.py +++ b/src/transformers/modeling_flax_utils.py @@ -681,7 +681,7 @@ def save_pretrained(self, save_directory: Union[str, os.PathLike], params=None, kwargs: - Additional key word arguments passed along to the [`~file_utils.PushToHubMixin.push_to_hub`] method. + Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. """ if os.path.isfile(save_directory): logger.error(f"Provided path ({save_directory}) should be a directory, not a file") diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index dfa341d853d4..d46226a5a1d1 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -1401,7 +1401,7 @@ def save_pretrained(self, save_directory, saved_model=False, version=1, push_to_ kwargs: - Additional key word arguments passed along to the [`~file_utils.PushToHubMixin.push_to_hub`] method. + Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. """ if os.path.isfile(save_directory): logger.error(f"Provided path ({save_directory}) should be a directory, not a file") diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 660310f27462..bee88d3cf04c 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -1036,7 +1036,7 @@ def save_pretrained( kwargs: - Additional key word arguments passed along to the [`~file_utils.PushToHubMixin.push_to_hub`] method. + Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. """ if os.path.isfile(save_directory): logger.error(f"Provided path ({save_directory}) should be a directory, not a file") @@ -2129,7 +2129,7 @@ def forward( Mask for tokens at invalid position, such as query and special symbols (PAD, SEP, CLS). 1.0 means token should be masked. return_dict (`bool`, *optional*, defaults to `False`): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Returns: """ diff --git a/src/transformers/models/albert/modeling_albert.py b/src/transformers/models/albert/modeling_albert.py index 9cf283c70e6f..7563ae52897b 100755 --- a/src/transformers/models/albert/modeling_albert.py +++ b/src/transformers/models/albert/modeling_albert.py @@ -610,7 +610,7 @@ class AlbertForPreTrainingOutput(ModelOutput): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/albert/modeling_flax_albert.py b/src/transformers/models/albert/modeling_flax_albert.py index 289a72ba1fd7..5b05c5a152db 100644 --- a/src/transformers/models/albert/modeling_flax_albert.py +++ b/src/transformers/models/albert/modeling_flax_albert.py @@ -144,7 +144,7 @@ class FlaxAlbertForPreTrainingOutput(ModelOutput): Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0, config.max_position_embeddings - 1]`. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/albert/modeling_tf_albert.py b/src/transformers/models/albert/modeling_tf_albert.py index 5117d58e169d..51bc5c0ae77b 100644 --- a/src/transformers/models/albert/modeling_tf_albert.py +++ b/src/transformers/models/albert/modeling_tf_albert.py @@ -747,8 +747,8 @@ class TFAlbertForPreTrainingOutput(ModelOutput): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/bart/modeling_bart.py b/src/transformers/models/bart/modeling_bart.py index 5dfd14e42a08..cfb4632a0958 100755 --- a/src/transformers/models/bart/modeling_bart.py +++ b/src/transformers/models/bart/modeling_bart.py @@ -679,7 +679,7 @@ def __init_subclass__(self): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -770,7 +770,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -993,7 +993,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -1799,7 +1799,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Returns: diff --git a/src/transformers/models/bart/modeling_flax_bart.py b/src/transformers/models/bart/modeling_flax_bart.py index 6e8f70fb1ea5..29acc0325b04 100644 --- a/src/transformers/models/bart/modeling_flax_bart.py +++ b/src/transformers/models/bart/modeling_flax_bart.py @@ -138,7 +138,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -169,7 +169,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ BART_DECODE_INPUTS_DOCSTRING = r""" @@ -215,7 +215,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/bart/modeling_tf_bart.py b/src/transformers/models/bart/modeling_tf_bart.py index 9e4d87d957d4..9599bfe1d1e2 100644 --- a/src/transformers/models/bart/modeling_tf_bart.py +++ b/src/transformers/models/bart/modeling_tf_bart.py @@ -625,8 +625,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -715,7 +715,7 @@ def call( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ if input_ids is not None and inputs_embeds is not None: @@ -894,7 +894,7 @@ def call( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ if input_ids is not None and inputs_embeds is not None: diff --git a/src/transformers/models/beit/feature_extraction_beit.py b/src/transformers/models/beit/feature_extraction_beit.py index bbf54af266bf..fb74a7c59afc 100644 --- a/src/transformers/models/beit/feature_extraction_beit.py +++ b/src/transformers/models/beit/feature_extraction_beit.py @@ -120,7 +120,7 @@ def __call__( segmentation_maps (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`, *optional*): Optionally, the corresponding semantic segmentation maps with the pixel-wise annotations. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`): + return_tensors (`str` or [`~utils.TensorType`], *optional*, defaults to `'np'`): If set, will return tensors of a particular framework. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/beit/modeling_beit.py b/src/transformers/models/beit/modeling_beit.py index 68584476d1b8..606a9371d082 100755 --- a/src/transformers/models/beit/modeling_beit.py +++ b/src/transformers/models/beit/modeling_beit.py @@ -618,7 +618,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/beit/modeling_flax_beit.py b/src/transformers/models/beit/modeling_flax_beit.py index ab7eff4d63e7..952f2aca7201 100644 --- a/src/transformers/models/beit/modeling_flax_beit.py +++ b/src/transformers/models/beit/modeling_flax_beit.py @@ -111,7 +111,7 @@ class FlaxBeitModelOutputWithPooling(FlaxBaseModelOutputWithPooling): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/bert/modeling_bert.py b/src/transformers/models/bert/modeling_bert.py index 81642e8a5952..88c99c8a2ee4 100755 --- a/src/transformers/models/bert/modeling_bert.py +++ b/src/transformers/models/bert/modeling_bert.py @@ -837,7 +837,7 @@ class BertForPreTrainingOutput(ModelOutput): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/bert/modeling_flax_bert.py b/src/transformers/models/bert/modeling_flax_bert.py index 98dfd7b7a405..241e0a3ff5a9 100644 --- a/src/transformers/models/bert/modeling_flax_bert.py +++ b/src/transformers/models/bert/modeling_flax_bert.py @@ -164,7 +164,7 @@ class FlaxBertForPreTrainingOutput(ModelOutput): - 0 indicates the head is **masked**. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index cc9770e63e26..6dfae3d5fb60 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -1025,8 +1025,8 @@ class TFBertForPreTrainingOutput(ModelOutput): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False``): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/bert_generation/modeling_bert_generation.py b/src/transformers/models/bert_generation/modeling_bert_generation.py index 2123faee5c61..49a9e96d7c2b 100755 --- a/src/transformers/models/bert_generation/modeling_bert_generation.py +++ b/src/transformers/models/bert_generation/modeling_bert_generation.py @@ -242,7 +242,7 @@ def _init_weights(self, module): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/big_bird/modeling_big_bird.py b/src/transformers/models/big_bird/modeling_big_bird.py index e4a4a5963468..b765a854009d 100755 --- a/src/transformers/models/big_bird/modeling_big_bird.py +++ b/src/transformers/models/big_bird/modeling_big_bird.py @@ -1844,7 +1844,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/big_bird/modeling_flax_big_bird.py b/src/transformers/models/big_bird/modeling_flax_big_bird.py index d9ec686e850e..ad6aa3f2d8b7 100644 --- a/src/transformers/models/big_bird/modeling_flax_big_bird.py +++ b/src/transformers/models/big_bird/modeling_flax_big_bird.py @@ -181,7 +181,7 @@ class FlaxBigBirdForQuestionAnsweringModelOutput(ModelOutput): - 0 indicates the head is **masked**. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py index 2f624d362b25..540f77944b7b 100755 --- a/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +++ b/src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py @@ -1724,7 +1724,7 @@ def dummy_inputs(self): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ BIGBIRD_PEGASUS_STANDALONE_INPUTS_DOCSTRING = r""" @@ -1751,7 +1751,7 @@ def dummy_inputs(self): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -1834,7 +1834,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -2188,7 +2188,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -2999,7 +2999,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Returns: diff --git a/src/transformers/models/blenderbot/modeling_blenderbot.py b/src/transformers/models/blenderbot/modeling_blenderbot.py index 0984a842aa87..928e22e860e7 100755 --- a/src/transformers/models/blenderbot/modeling_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_blenderbot.py @@ -625,7 +625,7 @@ def dummy_inputs(self): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -710,7 +710,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -935,7 +935,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -1518,7 +1518,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Returns: diff --git a/src/transformers/models/blenderbot/modeling_flax_blenderbot.py b/src/transformers/models/blenderbot/modeling_flax_blenderbot.py index 529dc0b1d7e5..15e759fa38ed 100644 --- a/src/transformers/models/blenderbot/modeling_flax_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_flax_blenderbot.py @@ -124,7 +124,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -155,7 +155,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ BLENDERBOT_DECODE_INPUTS_DOCSTRING = r""" @@ -201,7 +201,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py index 09951cf9cc25..80236fab0211 100644 --- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py @@ -608,8 +608,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -700,8 +700,8 @@ def call( for more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be - used in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used + in eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -885,8 +885,8 @@ def call( for more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be - used in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used + in eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py index 81a857460469..1fb1c4753434 100755 --- a/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_blenderbot_small.py @@ -623,7 +623,7 @@ def dummy_inputs(self): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -708,7 +708,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -930,7 +930,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -1489,7 +1489,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Returns: diff --git a/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py index bfb90ef3c3b7..f94879d39fb1 100644 --- a/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py @@ -136,7 +136,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -167,7 +167,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ BLENDERBOT_SMALL_DECODE_INPUTS_DOCSTRING = r""" @@ -213,7 +213,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py index 5d3ac0cea78e..af575e6418b7 100644 --- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py @@ -613,8 +613,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -705,8 +705,8 @@ def call( for more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be - used in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used + in eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -889,8 +889,8 @@ def call( for more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be - used in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used + in eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/canine/modeling_canine.py b/src/transformers/models/canine/modeling_canine.py index 3a707ff05a2e..25343e4c4be9 100644 --- a/src/transformers/models/canine/modeling_canine.py +++ b/src/transformers/models/canine/modeling_canine.py @@ -975,7 +975,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/clip/feature_extraction_clip.py b/src/transformers/models/clip/feature_extraction_clip.py index 99ace19c6840..7614d05afd3e 100644 --- a/src/transformers/models/clip/feature_extraction_clip.py +++ b/src/transformers/models/clip/feature_extraction_clip.py @@ -104,7 +104,7 @@ def __call__( tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a number of channels, H and W are image height and width. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`): + return_tensors (`str` or [`~utils.TensorType`], *optional*, defaults to `'np'`): If set, will return tensors of a particular framework. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/clip/modeling_clip.py b/src/transformers/models/clip/modeling_clip.py index a5d556ff5e18..d6219131aeec 100755 --- a/src/transformers/models/clip/modeling_clip.py +++ b/src/transformers/models/clip/modeling_clip.py @@ -433,7 +433,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ CLIP_VISION_INPUTS_DOCSTRING = r""" @@ -448,7 +448,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ CLIP_INPUTS_DOCSTRING = r""" @@ -485,7 +485,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -540,7 +540,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/clip/modeling_flax_clip.py b/src/transformers/models/clip/modeling_flax_clip.py index 5ceecafd6f43..c3d5734edd39 100644 --- a/src/transformers/models/clip/modeling_flax_clip.py +++ b/src/transformers/models/clip/modeling_flax_clip.py @@ -100,7 +100,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ CLIP_VISION_INPUTS_DOCSTRING = r""" @@ -115,7 +115,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ CLIP_INPUTS_DOCSTRING = r""" @@ -150,7 +150,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/clip/modeling_tf_clip.py b/src/transformers/models/clip/modeling_tf_clip.py index 1031eb80679d..f8192ac7aa05 100644 --- a/src/transformers/models/clip/modeling_tf_clip.py +++ b/src/transformers/models/clip/modeling_tf_clip.py @@ -968,8 +968,8 @@ class TFCLIPPreTrainedModel(TFPreTrainedModel): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False``): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -988,8 +988,8 @@ class TFCLIPPreTrainedModel(TFPreTrainedModel): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False``): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -1030,8 +1030,8 @@ class TFCLIPPreTrainedModel(TFPreTrainedModel): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False``): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/clip/processing_clip.py b/src/transformers/models/clip/processing_clip.py index d750d4f2d2db..56dad3b8175e 100644 --- a/src/transformers/models/clip/processing_clip.py +++ b/src/transformers/models/clip/processing_clip.py @@ -57,7 +57,7 @@ def __call__(self, text=None, images=None, return_tensors=None, **kwargs): tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a number of channels, H and W are image height and width. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors of a particular framework. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/convbert/modeling_convbert.py b/src/transformers/models/convbert/modeling_convbert.py index 9bacdf8cd051..09b2c7ed4083 100755 --- a/src/transformers/models/convbert/modeling_convbert.py +++ b/src/transformers/models/convbert/modeling_convbert.py @@ -755,7 +755,7 @@ def forward(self, hidden_states): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/convbert/modeling_tf_convbert.py b/src/transformers/models/convbert/modeling_tf_convbert.py index 97cbb4478a71..61a4f5d69e42 100644 --- a/src/transformers/models/convbert/modeling_tf_convbert.py +++ b/src/transformers/models/convbert/modeling_tf_convbert.py @@ -736,8 +736,8 @@ class TFConvBertPreTrainedModel(TFPreTrainedModel): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/convnext/feature_extraction_convnext.py b/src/transformers/models/convnext/feature_extraction_convnext.py index 89336b3ed9b3..6d93d426efc1 100644 --- a/src/transformers/models/convnext/feature_extraction_convnext.py +++ b/src/transformers/models/convnext/feature_extraction_convnext.py @@ -103,7 +103,7 @@ def __call__( tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a number of channels, H and W are image height and width. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`): + return_tensors (`str` or [`~utils.TensorType`], *optional*, defaults to `'np'`): If set, will return tensors of a particular framework. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/convnext/modeling_convnext.py b/src/transformers/models/convnext/modeling_convnext.py index 78f177aed6ca..b202c548d4df 100755 --- a/src/transformers/models/convnext/modeling_convnext.py +++ b/src/transformers/models/convnext/modeling_convnext.py @@ -357,7 +357,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/convnext/modeling_tf_convnext.py b/src/transformers/models/convnext/modeling_tf_convnext.py index 712c91a8cf1b..b952b6775248 100644 --- a/src/transformers/models/convnext/modeling_tf_convnext.py +++ b/src/transformers/models/convnext/modeling_tf_convnext.py @@ -416,8 +416,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. """ diff --git a/src/transformers/models/ctrl/modeling_ctrl.py b/src/transformers/models/ctrl/modeling_ctrl.py index 4c2d9c3ef486..f03046928ca4 100644 --- a/src/transformers/models/ctrl/modeling_ctrl.py +++ b/src/transformers/models/ctrl/modeling_ctrl.py @@ -309,7 +309,7 @@ def _init_weights(self, module): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/ctrl/modeling_tf_ctrl.py b/src/transformers/models/ctrl/modeling_tf_ctrl.py index a6848ae0e0c3..89d3ef561141 100644 --- a/src/transformers/models/ctrl/modeling_tf_ctrl.py +++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py @@ -502,8 +502,8 @@ class TFCTRLPreTrainedModel(TFPreTrainedModel): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/data2vec/modeling_data2vec_audio.py b/src/transformers/models/data2vec/modeling_data2vec_audio.py index 9891d4ddbb19..1b531c048961 100755 --- a/src/transformers/models/data2vec/modeling_data2vec_audio.py +++ b/src/transformers/models/data2vec/modeling_data2vec_audio.py @@ -887,7 +887,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/data2vec/modeling_data2vec_text.py b/src/transformers/models/data2vec/modeling_data2vec_text.py index 9759848b6bc1..87a2e810e298 100644 --- a/src/transformers/models/data2vec/modeling_data2vec_text.py +++ b/src/transformers/models/data2vec/modeling_data2vec_text.py @@ -689,7 +689,7 @@ def update_keys_to_ignore(self, config, del_keys_to_ignore): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/deberta/modeling_deberta.py b/src/transformers/models/deberta/modeling_deberta.py index b9b4bb6f7bd8..a1df51ac6387 100644 --- a/src/transformers/models/deberta/modeling_deberta.py +++ b/src/transformers/models/deberta/modeling_deberta.py @@ -871,7 +871,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/deberta/modeling_tf_deberta.py b/src/transformers/models/deberta/modeling_tf_deberta.py index 4cc98f320b21..c97b676596fb 100644 --- a/src/transformers/models/deberta/modeling_tf_deberta.py +++ b/src/transformers/models/deberta/modeling_tf_deberta.py @@ -1063,7 +1063,7 @@ class TFDebertaPreTrainedModel(TFPreTrainedModel): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~transformers.file_utils.ModelOutput``] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput``] instead of a plain tuple. """ diff --git a/src/transformers/models/deberta_v2/modeling_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_deberta_v2.py index 51850956473b..c779267b7b38 100644 --- a/src/transformers/models/deberta_v2/modeling_deberta_v2.py +++ b/src/transformers/models/deberta_v2/modeling_deberta_v2.py @@ -965,7 +965,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py index cfba4923ba76..0a77a6057d9d 100644 --- a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py +++ b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py @@ -1164,7 +1164,7 @@ class TFDebertaV2PreTrainedModel(TFPreTrainedModel): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~transformers.file_utils.ModelOutput``] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput``] instead of a plain tuple. """ diff --git a/src/transformers/models/decision_transformer/__init__.py b/src/transformers/models/decision_transformer/__init__.py index 7f8590578979..8a72ff89c17c 100644 --- a/src/transformers/models/decision_transformer/__init__.py +++ b/src/transformers/models/decision_transformer/__init__.py @@ -18,7 +18,7 @@ from typing import TYPE_CHECKING # rely on isort to merge the imports -from ...file_utils import _LazyModule, is_torch_available +from ...utils import _LazyModule, is_torch_available _import_structure = { diff --git a/src/transformers/models/decision_transformer/modeling_decision_transformer.py b/src/transformers/models/decision_transformer/modeling_decision_transformer.py index 02ec88e2c153..50d6930af903 100755 --- a/src/transformers/models/decision_transformer/modeling_decision_transformer.py +++ b/src/transformers/models/decision_transformer/modeling_decision_transformer.py @@ -25,14 +25,14 @@ from torch import nn from ...activations import ACT2FN -from ...file_utils import ( +from ...modeling_utils import Conv1D, PreTrainedModel, find_pruneable_heads_and_indices, prune_conv1d_layer +from ...utils import ( ModelOutput, add_start_docstrings, add_start_docstrings_to_model_forward, + logging, replace_return_docstrings, ) -from ...modeling_utils import Conv1D, PreTrainedModel, find_pruneable_heads_and_indices, prune_conv1d_layer -from ...utils import logging if version.parse(torch.__version__) >= version.parse("1.6"): diff --git a/src/transformers/models/deit/feature_extraction_deit.py b/src/transformers/models/deit/feature_extraction_deit.py index d5fb9fa7684b..7e91d6218ff7 100644 --- a/src/transformers/models/deit/feature_extraction_deit.py +++ b/src/transformers/models/deit/feature_extraction_deit.py @@ -107,7 +107,7 @@ def __call__( tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a number of channels, H and W are image height and width. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`): + return_tensors (`str` or [`~utils.TensorType`], *optional*, defaults to `'np'`): If set, will return tensors of a particular framework. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/deit/modeling_deit.py b/src/transformers/models/deit/modeling_deit.py index 205e24610108..012d535c2536 100644 --- a/src/transformers/models/deit/modeling_deit.py +++ b/src/transformers/models/deit/modeling_deit.py @@ -460,7 +460,7 @@ def _set_gradient_checkpointing(self, module: DeiTEncoder, value: bool = False) Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/detr/feature_extraction_detr.py b/src/transformers/models/detr/feature_extraction_detr.py index ff69c8430eb1..15b37fbae7d3 100644 --- a/src/transformers/models/detr/feature_extraction_detr.py +++ b/src/transformers/models/detr/feature_extraction_detr.py @@ -455,7 +455,7 @@ def __call__( - 1 for pixels that are real (i.e. **not masked**), - 0 for pixels that are padding (i.e. **masked**). - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of NumPy arrays. If set to `'pt'`, return PyTorch `torch.Tensor` objects. @@ -638,7 +638,7 @@ def pad_and_create_pixel_mask( Args: pixel_values_list (`List[torch.Tensor]`): List of images (pixel values) to be padded. Each image should be a tensor of shape (C, H, W). - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of NumPy arrays. If set to `'pt'`, return PyTorch `torch.Tensor` objects. diff --git a/src/transformers/models/detr/modeling_detr.py b/src/transformers/models/detr/modeling_detr.py index 54ce7fc8cea4..92ace3f9f7fb 100644 --- a/src/transformers/models/detr/modeling_detr.py +++ b/src/transformers/models/detr/modeling_detr.py @@ -868,7 +868,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -932,7 +932,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -1054,7 +1054,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/distilbert/modeling_distilbert.py b/src/transformers/models/distilbert/modeling_distilbert.py index 76869eda885d..14bc7b9e949b 100755 --- a/src/transformers/models/distilbert/modeling_distilbert.py +++ b/src/transformers/models/distilbert/modeling_distilbert.py @@ -446,7 +446,7 @@ def _init_weights(self, module: nn.Module): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/distilbert/modeling_flax_distilbert.py b/src/transformers/models/distilbert/modeling_flax_distilbert.py index 48a486f3cde0..c84160b5fe8e 100644 --- a/src/transformers/models/distilbert/modeling_flax_distilbert.py +++ b/src/transformers/models/distilbert/modeling_flax_distilbert.py @@ -89,7 +89,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/distilbert/modeling_tf_distilbert.py b/src/transformers/models/distilbert/modeling_tf_distilbert.py index c2e05ecfddfe..ccae454ebe05 100644 --- a/src/transformers/models/distilbert/modeling_tf_distilbert.py +++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py @@ -508,8 +508,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/dpr/modeling_dpr.py b/src/transformers/models/dpr/modeling_dpr.py index 6b1fd35bb61c..b02fb11d0b96 100644 --- a/src/transformers/models/dpr/modeling_dpr.py +++ b/src/transformers/models/dpr/modeling_dpr.py @@ -398,7 +398,7 @@ class DPRPretrainedReader(DPRPreTrainedModel): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ DPR_READER_INPUTS_DOCSTRING = r""" @@ -434,7 +434,7 @@ class DPRPretrainedReader(DPRPreTrainedModel): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/dpr/modeling_tf_dpr.py b/src/transformers/models/dpr/modeling_tf_dpr.py index a47d97562037..f2b1a1606e4d 100644 --- a/src/transformers/models/dpr/modeling_tf_dpr.py +++ b/src/transformers/models/dpr/modeling_tf_dpr.py @@ -487,8 +487,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -523,8 +523,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/dpr/tokenization_dpr.py b/src/transformers/models/dpr/tokenization_dpr.py index b47776f057ea..acee196767a3 100644 --- a/src/transformers/models/dpr/tokenization_dpr.py +++ b/src/transformers/models/dpr/tokenization_dpr.py @@ -144,7 +144,7 @@ class DPRQuestionEncoderTokenizer(BertTokenizer): The passages titles to be encoded. This can be a string or a list of strings if there are several passages. texts (`str` or `List[str]`): The passages texts to be encoded. This can be a string or a list of strings if there are several passages. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`): Activates and controls padding. Accepts the following values: - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single sequence @@ -174,7 +174,7 @@ class DPRQuestionEncoderTokenizer(BertTokenizer): If left unset or set to `None`, this will use the predefined model maximum length if a maximum length is required by one of the truncation/padding parameters. If the model has no specific maximum input length (like XLNet) truncation/padding to a maximum length will be deactivated. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of list of python integers. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/dpr/tokenization_dpr_fast.py b/src/transformers/models/dpr/tokenization_dpr_fast.py index 1b9e9c93fb1a..ea021dcb6ab1 100644 --- a/src/transformers/models/dpr/tokenization_dpr_fast.py +++ b/src/transformers/models/dpr/tokenization_dpr_fast.py @@ -145,7 +145,7 @@ class DPRQuestionEncoderTokenizerFast(BertTokenizerFast): The passages titles to be encoded. This can be a string or a list of strings if there are several passages. texts (`str` or `List[str]`): The passages texts to be encoded. This can be a string or a list of strings if there are several passages. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`): Activates and controls padding. Accepts the following values: - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single sequence @@ -175,7 +175,7 @@ class DPRQuestionEncoderTokenizerFast(BertTokenizerFast): If left unset or set to `None`, this will use the predefined model maximum length if a maximum length is required by one of the truncation/padding parameters. If the model has no specific maximum input length (like XLNet) truncation/padding to a maximum length will be deactivated. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of list of python integers. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/electra/modeling_electra.py b/src/transformers/models/electra/modeling_electra.py index 740edb309fa1..ce0b9c36cd25 100644 --- a/src/transformers/models/electra/modeling_electra.py +++ b/src/transformers/models/electra/modeling_electra.py @@ -794,7 +794,7 @@ class ElectraForPreTrainingOutput(ModelOutput): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/electra/modeling_flax_electra.py b/src/transformers/models/electra/modeling_flax_electra.py index de775e11363e..e083080e414e 100644 --- a/src/transformers/models/electra/modeling_flax_electra.py +++ b/src/transformers/models/electra/modeling_flax_electra.py @@ -134,7 +134,7 @@ class FlaxElectraForPreTrainingOutput(ModelOutput): - 0 indicates the head is **masked**. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py index f442d7de480b..9cbbd4b7e1e5 100644 --- a/src/transformers/models/electra/modeling_tf_electra.py +++ b/src/transformers/models/electra/modeling_tf_electra.py @@ -907,8 +907,8 @@ class TFElectraForPreTrainingOutput(ModelOutput): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py index 1a5c17af02c2..7bad5f98d377 100644 --- a/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_encoder_decoder.py @@ -135,7 +135,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - If set to `True`, the model will return a [`~file_utils.Seq2SeqLMOutput`] instead of a plain tuple. + If set to `True`, the model will return a [`~utils.Seq2SeqLMOutput`] instead of a plain tuple. kwargs: (*optional*) Remaining dictionary of keyword arguments. Keyword arguments come in two flavors: - Without a prefix which will be input as `**encoder_kwargs` for the encoder forward function. diff --git a/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py index 9b07cddd364d..6c61bc8016f1 100644 --- a/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py @@ -122,7 +122,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - If set to `True`, the model will return a [`~file_utils.FlaxSeq2SeqLMOutput`] instead of a plain tuple. + If set to `True`, the model will return a [`~utils.FlaxSeq2SeqLMOutput`] instead of a plain tuple. """ ENCODER_DECODER_ENCODE_INPUTS_DOCSTRING = r""" @@ -152,7 +152,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - If set to `True`, the model will return a [`~file_utils.FlaxBaseModelOutput`] instead of a plain tuple. + If set to `True`, the model will return a [`~utils.FlaxBaseModelOutput`] instead of a plain tuple. """ ENCODER_DECODER_DECODE_INPUTS_DOCSTRING = r""" @@ -198,8 +198,8 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - If set to `True`, the model will return a [`~file_utils.FlaxCausalLMOutputWithCrossAttentions`] instead of - a plain tuple. + If set to `True`, the model will return a [`~utils.FlaxCausalLMOutputWithCrossAttentions`] instead of a + plain tuple. """ diff --git a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py index 8d572656acb9..c2be91c7a000 100644 --- a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py @@ -143,7 +143,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - If set to `True`, the model will return a [`~file_utils.Seq2SeqLMOutput`] instead of a plain tuple. + If set to `True`, the model will return a [`~utils.Seq2SeqLMOutput`] instead of a plain tuple. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/flaubert/modeling_flaubert.py b/src/transformers/models/flaubert/modeling_flaubert.py index 35f72e9b353f..9721880ac976 100644 --- a/src/transformers/models/flaubert/modeling_flaubert.py +++ b/src/transformers/models/flaubert/modeling_flaubert.py @@ -123,7 +123,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/flaubert/modeling_tf_flaubert.py b/src/transformers/models/flaubert/modeling_tf_flaubert.py index 427291aad2cd..8441e1801730 100644 --- a/src/transformers/models/flaubert/modeling_tf_flaubert.py +++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py @@ -165,8 +165,8 @@ more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/fnet/modeling_fnet.py b/src/transformers/models/fnet/modeling_fnet.py index e2903733663e..702a66de8bc3 100755 --- a/src/transformers/models/fnet/modeling_fnet.py +++ b/src/transformers/models/fnet/modeling_fnet.py @@ -507,7 +507,7 @@ class FNetForPreTrainingOutput(ModelOutput): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/fsmt/modeling_fsmt.py b/src/transformers/models/fsmt/modeling_fsmt.py index 9e55c2937e75..96a771049337 100644 --- a/src/transformers/models/fsmt/modeling_fsmt.py +++ b/src/transformers/models/fsmt/modeling_fsmt.py @@ -282,7 +282,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/funnel/modeling_funnel.py b/src/transformers/models/funnel/modeling_funnel.py index d8e844d4eac3..267d32f2a47a 100644 --- a/src/transformers/models/funnel/modeling_funnel.py +++ b/src/transformers/models/funnel/modeling_funnel.py @@ -913,7 +913,7 @@ class FunnelForPreTrainingOutput(ModelOutput): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py index fe8a81f9bfd1..56e6bf13b494 100644 --- a/src/transformers/models/funnel/modeling_tf_funnel.py +++ b/src/transformers/models/funnel/modeling_tf_funnel.py @@ -1079,8 +1079,8 @@ class TFFunnelForPreTrainingOutput(ModelOutput): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/glpn/feature_extraction_glpn.py b/src/transformers/models/glpn/feature_extraction_glpn.py index d0205be9e36a..2694d56b898b 100644 --- a/src/transformers/models/glpn/feature_extraction_glpn.py +++ b/src/transformers/models/glpn/feature_extraction_glpn.py @@ -86,7 +86,7 @@ def __call__( tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a number of channels, H and W are image height and width. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`): + return_tensors (`str` or [`~utils.TensorType`], *optional*, defaults to `'np'`): If set, will return tensors of a particular framework. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/glpn/modeling_glpn.py b/src/transformers/models/glpn/modeling_glpn.py index 9b0429bb352b..c8d6bac79b36 100755 --- a/src/transformers/models/glpn/modeling_glpn.py +++ b/src/transformers/models/glpn/modeling_glpn.py @@ -467,7 +467,7 @@ def _init_weights(self, module): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/gpt2/modeling_flax_gpt2.py b/src/transformers/models/gpt2/modeling_flax_gpt2.py index 1a1f5cc1a52f..f66b539a55ca 100644 --- a/src/transformers/models/gpt2/modeling_flax_gpt2.py +++ b/src/transformers/models/gpt2/modeling_flax_gpt2.py @@ -103,7 +103,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/gpt2/modeling_gpt2.py b/src/transformers/models/gpt2/modeling_gpt2.py index 8fa770b53c4f..b0755b804bb7 100644 --- a/src/transformers/models/gpt2/modeling_gpt2.py +++ b/src/transformers/models/gpt2/modeling_gpt2.py @@ -607,7 +607,7 @@ class GPT2DoubleHeadsModelOutput(ModelOutput): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ PARALLELIZE_DOCSTRING = r""" This is an experimental feature and is a subject to change at a moment's notice. diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index c0bd4768f165..4d3734e78049 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -715,8 +715,8 @@ class TFGPT2DoubleHeadsModelOutput(ModelOutput): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/gpt_neo/modeling_flax_gpt_neo.py b/src/transformers/models/gpt_neo/modeling_flax_gpt_neo.py index d64e160c6ab7..d548cc02efed 100644 --- a/src/transformers/models/gpt_neo/modeling_flax_gpt_neo.py +++ b/src/transformers/models/gpt_neo/modeling_flax_gpt_neo.py @@ -101,7 +101,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/gpt_neo/modeling_gpt_neo.py b/src/transformers/models/gpt_neo/modeling_gpt_neo.py index 8fe1808a272e..ba68786d35ad 100755 --- a/src/transformers/models/gpt_neo/modeling_gpt_neo.py +++ b/src/transformers/models/gpt_neo/modeling_gpt_neo.py @@ -463,7 +463,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/gptj/modeling_flax_gptj.py b/src/transformers/models/gptj/modeling_flax_gptj.py index d9bedbe26407..6453eed641f7 100644 --- a/src/transformers/models/gptj/modeling_flax_gptj.py +++ b/src/transformers/models/gptj/modeling_flax_gptj.py @@ -103,7 +103,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/gptj/modeling_gptj.py b/src/transformers/models/gptj/modeling_gptj.py index 974b3aabed9d..b957cec54a8e 100755 --- a/src/transformers/models/gptj/modeling_gptj.py +++ b/src/transformers/models/gptj/modeling_gptj.py @@ -390,7 +390,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ PARALLELIZE_DOCSTRING = r""" diff --git a/src/transformers/models/hubert/modeling_hubert.py b/src/transformers/models/hubert/modeling_hubert.py index 4147f60683f2..00ac994dd632 100755 --- a/src/transformers/models/hubert/modeling_hubert.py +++ b/src/transformers/models/hubert/modeling_hubert.py @@ -931,7 +931,7 @@ def _get_feature_vector_attention_mask(self, feature_vector_length: int, attenti Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py index be0508937eb9..4908aeb9108c 100644 --- a/src/transformers/models/hubert/modeling_tf_hubert.py +++ b/src/transformers/models/hubert/modeling_tf_hubert.py @@ -1387,8 +1387,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False``): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/ibert/modeling_ibert.py b/src/transformers/models/ibert/modeling_ibert.py index 44ca8d12236d..c2d4e23b295a 100644 --- a/src/transformers/models/ibert/modeling_ibert.py +++ b/src/transformers/models/ibert/modeling_ibert.py @@ -721,7 +721,7 @@ def resize_token_embeddings(self, new_num_tokens=None): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/imagegpt/feature_extraction_imagegpt.py b/src/transformers/models/imagegpt/feature_extraction_imagegpt.py index 0926cde8dd87..b49d5e521e44 100644 --- a/src/transformers/models/imagegpt/feature_extraction_imagegpt.py +++ b/src/transformers/models/imagegpt/feature_extraction_imagegpt.py @@ -117,7 +117,7 @@ def __call__( tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a number of channels, H and W are image height and width. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`): + return_tensors (`str` or [`~utils.TensorType`], *optional*, defaults to `'np'`): If set, will return tensors of a particular framework. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/imagegpt/modeling_imagegpt.py b/src/transformers/models/imagegpt/modeling_imagegpt.py index c2f5f0ae12c2..6bca6c921ab0 100755 --- a/src/transformers/models/imagegpt/modeling_imagegpt.py +++ b/src/transformers/models/imagegpt/modeling_imagegpt.py @@ -608,7 +608,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/layoutlm/modeling_layoutlm.py b/src/transformers/models/layoutlm/modeling_layoutlm.py index e0f7a34f3f8b..2eea904360c6 100644 --- a/src/transformers/models/layoutlm/modeling_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_layoutlm.py @@ -695,7 +695,7 @@ def _set_gradient_checkpointing(self, module, value=False): If set to `True`, the hidden states of all layers are returned. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - If set to `True`, the model will return a [`~file_utils.ModelOutput`] instead of a plain tuple. + If set to `True`, the model will return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py index 6fcab3e54694..e6fd771d37e2 100644 --- a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py @@ -891,7 +891,7 @@ class TFLayoutLMPreTrainedModel(TFPreTrainedModel): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/layoutlmv2/feature_extraction_layoutlmv2.py b/src/transformers/models/layoutlmv2/feature_extraction_layoutlmv2.py index 2bf476239ce4..12fe27f1a17e 100644 --- a/src/transformers/models/layoutlmv2/feature_extraction_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/feature_extraction_layoutlmv2.py @@ -131,7 +131,7 @@ def __call__( The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a number of channels, H and W are image height and width. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`): + return_tensors (`str` or [`~utils.TensorType`], *optional*, defaults to `'np'`): If set, will return tensors of a particular framework. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py index 203e56e0da95..af5ba22c2e94 100755 --- a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py @@ -682,7 +682,7 @@ def synchronize_batch_norm(self): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py index 9aa0e4206e50..3e9a027014dd 100644 --- a/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py @@ -60,7 +60,7 @@ LAYOUTLMV2_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r""" add_special_tokens (`bool`, *optional*, defaults to `True`): Whether or not to encode the sequences with the special tokens relative to their model. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`): Activates and controls padding. Accepts the following values: - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single @@ -97,7 +97,7 @@ pad_to_multiple_of (`int`, *optional*): If set will pad the sequence to a multiple of the provided value. This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta). - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of list of python integers. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/led/modeling_led.py b/src/transformers/models/led/modeling_led.py index 11b9d8f38d5c..df3b97b84e40 100755 --- a/src/transformers/models/led/modeling_led.py +++ b/src/transformers/models/led/modeling_led.py @@ -1590,7 +1590,7 @@ class LEDSeq2SeqQuestionAnsweringModelOutput(ModelOutput): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -1748,7 +1748,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -2003,7 +2003,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index 5564a91dd1d8..cdd82c6c5058 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -1600,8 +1600,8 @@ class TFLEDSeq2SeqLMOutput(ModelOutput): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -1701,7 +1701,7 @@ def call( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ inputs = input_processing( func=self.call, @@ -1983,7 +1983,7 @@ def call( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ inputs = input_processing( func=self.call, diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index e6b96d6934d0..87b1974ab944 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -1486,7 +1486,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index df1f130de4f1..762f872ee709 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -1974,8 +1974,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/luke/modeling_luke.py b/src/transformers/models/luke/modeling_luke.py index 2cc7bd698160..938f95a63410 100644 --- a/src/transformers/models/luke/modeling_luke.py +++ b/src/transformers/models/luke/modeling_luke.py @@ -868,7 +868,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/luke/tokenization_luke.py b/src/transformers/models/luke/tokenization_luke.py index 20e12a01e206..e35db36aedee 100644 --- a/src/transformers/models/luke/tokenization_luke.py +++ b/src/transformers/models/luke/tokenization_luke.py @@ -1108,7 +1108,7 @@ def pad( List[int]]]*) so you can use this method during preprocessing as well as in a PyTorch Dataloader collate function. Instead of `List[int]` you can have tensors (numpy arrays, PyTorch tensors or TensorFlow tensors), see the note above for the return type. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`): Select a strategy to pad the returned sequences (according to the model's padding side and padding index) among: @@ -1129,7 +1129,7 @@ def pad( Whether to return the attention mask. If left to the default, will return the attention mask according to the specific tokenizer's default, defined by the `return_outputs` attribute. [What are attention masks?](../glossary#attention-mask) - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of list of python integers. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/lxmert/modeling_lxmert.py b/src/transformers/models/lxmert/modeling_lxmert.py index 6676fd09a7b4..bd67d5a81ad0 100644 --- a/src/transformers/models/lxmert/modeling_lxmert.py +++ b/src/transformers/models/lxmert/modeling_lxmert.py @@ -875,7 +875,7 @@ def _init_weights(self, module): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/lxmert/modeling_tf_lxmert.py b/src/transformers/models/lxmert/modeling_tf_lxmert.py index 0a6e1b33b023..06cd3c9504c5 100644 --- a/src/transformers/models/lxmert/modeling_tf_lxmert.py +++ b/src/transformers/models/lxmert/modeling_tf_lxmert.py @@ -929,8 +929,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/m2m_100/modeling_m2m_100.py b/src/transformers/models/m2m_100/modeling_m2m_100.py index 36dc9e11ce90..3bb749564a01 100755 --- a/src/transformers/models/m2m_100/modeling_m2m_100.py +++ b/src/transformers/models/m2m_100/modeling_m2m_100.py @@ -667,7 +667,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -753,7 +753,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -952,7 +952,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/marian/modeling_flax_marian.py b/src/transformers/models/marian/modeling_flax_marian.py index d914cd410c27..e9702868ca4a 100644 --- a/src/transformers/models/marian/modeling_flax_marian.py +++ b/src/transformers/models/marian/modeling_flax_marian.py @@ -136,7 +136,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -167,7 +167,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ MARIAN_DECODE_INPUTS_DOCSTRING = r""" @@ -213,7 +213,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/marian/modeling_marian.py b/src/transformers/models/marian/modeling_marian.py index a886e8f4b9d4..333809edfb00 100755 --- a/src/transformers/models/marian/modeling_marian.py +++ b/src/transformers/models/marian/modeling_marian.py @@ -634,7 +634,7 @@ def dummy_inputs(self): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -723,7 +723,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -942,7 +942,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -1643,7 +1643,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Returns: diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index e116760474c0..aa766e681544 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -654,8 +654,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -745,8 +745,8 @@ def call( for more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be - used in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used + in eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -928,8 +928,8 @@ def call( for more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be - used in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used + in eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/maskformer/feature_extraction_maskformer.py b/src/transformers/models/maskformer/feature_extraction_maskformer.py index 5bde1960504e..bd8adc04d0b7 100644 --- a/src/transformers/models/maskformer/feature_extraction_maskformer.py +++ b/src/transformers/models/maskformer/feature_extraction_maskformer.py @@ -196,7 +196,7 @@ def __call__( - 1 for pixels that are real (i.e. **not masked**), - 0 for pixels that are padding (i.e. **masked**). - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of NumPy arrays. If set to `'pt'`, return PyTorch `torch.Tensor` objects. @@ -315,7 +315,7 @@ def encode_inputs( - 1 for pixels that are real (i.e. **not masked**), - 0 for pixels that are padding (i.e. **masked**). - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of NumPy arrays. If set to `'pt'`, return PyTorch `torch.Tensor` objects. diff --git a/src/transformers/models/maskformer/modeling_maskformer.py b/src/transformers/models/maskformer/modeling_maskformer.py index 0d53c818dc5e..450ba50b59ed 100644 --- a/src/transformers/models/maskformer/modeling_maskformer.py +++ b/src/transformers/models/maskformer/modeling_maskformer.py @@ -1482,7 +1482,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/mbart/modeling_flax_mbart.py b/src/transformers/models/mbart/modeling_flax_mbart.py index eb5335f93147..2e05780114e5 100644 --- a/src/transformers/models/mbart/modeling_flax_mbart.py +++ b/src/transformers/models/mbart/modeling_flax_mbart.py @@ -138,7 +138,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -169,7 +169,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ MBART_DECODE_INPUTS_DOCSTRING = r""" @@ -215,7 +215,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/mbart/modeling_mbart.py b/src/transformers/models/mbart/modeling_mbart.py index bceaac67be3d..446a02f648cd 100755 --- a/src/transformers/models/mbart/modeling_mbart.py +++ b/src/transformers/models/mbart/modeling_mbart.py @@ -673,7 +673,7 @@ def dummy_inputs(self): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -764,7 +764,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -989,7 +989,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -1793,7 +1793,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Returns: diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index bf0010b9d646..f9f3014c67ff 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -594,8 +594,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -722,8 +722,8 @@ def call( for more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be - used in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used + in eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -912,8 +912,8 @@ def call( for more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be - used in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used + in eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/megatron_bert/modeling_megatron_bert.py b/src/transformers/models/megatron_bert/modeling_megatron_bert.py index acf0574f4f3d..36b7e3dfdf2a 100755 --- a/src/transformers/models/megatron_bert/modeling_megatron_bert.py +++ b/src/transformers/models/megatron_bert/modeling_megatron_bert.py @@ -824,7 +824,7 @@ class MegatronBertForPreTrainingOutput(ModelOutput): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/mluke/tokenization_mluke.py b/src/transformers/models/mluke/tokenization_mluke.py index d2ce9bfb8228..1ddf472d56cc 100644 --- a/src/transformers/models/mluke/tokenization_mluke.py +++ b/src/transformers/models/mluke/tokenization_mluke.py @@ -1221,7 +1221,7 @@ def pad( List[int]]]*) so you can use this method during preprocessing as well as in a PyTorch Dataloader collate function. Instead of `List[int]` you can have tensors (numpy arrays, PyTorch tensors or TensorFlow tensors), see the note above for the return type. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`): Select a strategy to pad the returned sequences (according to the model's padding side and padding index) among: @@ -1242,7 +1242,7 @@ def pad( Whether to return the attention mask. If left to the default, will return the attention mask according to the specific tokenizer's default, defined by the `return_outputs` attribute. [What are attention masks?](../glossary#attention-mask) - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of list of python integers. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/mmbt/modeling_mmbt.py b/src/transformers/models/mmbt/modeling_mmbt.py index 9b9142befbc1..5e284c1b6996 100644 --- a/src/transformers/models/mmbt/modeling_mmbt.py +++ b/src/transformers/models/mmbt/modeling_mmbt.py @@ -170,7 +170,7 @@ def forward(self, input_modal, start_token=None, end_token=None, position_ids=No Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/mobilebert/modeling_mobilebert.py b/src/transformers/models/mobilebert/modeling_mobilebert.py index 022c3dfd6f29..ff971110e140 100644 --- a/src/transformers/models/mobilebert/modeling_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_mobilebert.py @@ -776,7 +776,7 @@ class MobileBertForPreTrainingOutput(ModelOutput): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py index 12fc439b00ab..007be43f5f06 100644 --- a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py @@ -891,8 +891,8 @@ class TFMobileBertForPreTrainingOutput(ModelOutput): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/mpnet/modeling_mpnet.py b/src/transformers/models/mpnet/modeling_mpnet.py index d65be4d9736e..195f961dcf8b 100644 --- a/src/transformers/models/mpnet/modeling_mpnet.py +++ b/src/transformers/models/mpnet/modeling_mpnet.py @@ -470,7 +470,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/mpnet/modeling_tf_mpnet.py b/src/transformers/models/mpnet/modeling_tf_mpnet.py index 003495b49c5c..5edd73c4170b 100644 --- a/src/transformers/models/mpnet/modeling_tf_mpnet.py +++ b/src/transformers/models/mpnet/modeling_tf_mpnet.py @@ -650,8 +650,8 @@ def call( more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/nystromformer/modeling_nystromformer.py b/src/transformers/models/nystromformer/modeling_nystromformer.py index b7a72821c704..70ba709e92cb 100755 --- a/src/transformers/models/nystromformer/modeling_nystromformer.py +++ b/src/transformers/models/nystromformer/modeling_nystromformer.py @@ -544,7 +544,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/openai/modeling_openai.py b/src/transformers/models/openai/modeling_openai.py index a6318a607be5..ce5ea166781b 100644 --- a/src/transformers/models/openai/modeling_openai.py +++ b/src/transformers/models/openai/modeling_openai.py @@ -394,7 +394,7 @@ class OpenAIGPTDoubleHeadsModelOutput(ModelOutput): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 374f8fb3530b..490b3fac47e5 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -483,8 +483,8 @@ class TFOpenAIGPTDoubleHeadsModelOutput(ModelOutput): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/pegasus/modeling_flax_pegasus.py b/src/transformers/models/pegasus/modeling_flax_pegasus.py index f5a5c2b75033..23831cb86fd7 100644 --- a/src/transformers/models/pegasus/modeling_flax_pegasus.py +++ b/src/transformers/models/pegasus/modeling_flax_pegasus.py @@ -133,7 +133,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -164,7 +164,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ PEGASUS_DECODE_INPUTS_DOCSTRING = r""" @@ -206,7 +206,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/pegasus/modeling_pegasus.py b/src/transformers/models/pegasus/modeling_pegasus.py index 0128a9919830..f1d7a6ce56ef 100755 --- a/src/transformers/models/pegasus/modeling_pegasus.py +++ b/src/transformers/models/pegasus/modeling_pegasus.py @@ -620,7 +620,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -735,7 +735,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -989,7 +989,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -1621,7 +1621,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Returns: diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index 853520c797a4..26f3ef461198 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -656,8 +656,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -748,8 +748,8 @@ def call( for more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be - used in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used + in eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -934,8 +934,8 @@ def call( for more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be - used in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used + in eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/perceiver/feature_extraction_perceiver.py b/src/transformers/models/perceiver/feature_extraction_perceiver.py index 62cb102beac1..de05ce7f24ca 100644 --- a/src/transformers/models/perceiver/feature_extraction_perceiver.py +++ b/src/transformers/models/perceiver/feature_extraction_perceiver.py @@ -136,7 +136,7 @@ def __call__( tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a number of channels, H and W are image height and width. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`): + return_tensors (`str` or [`~utils.TensorType`], *optional*, defaults to `'np'`): If set, will return tensors of a particular framework. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/perceiver/modeling_perceiver.py b/src/transformers/models/perceiver/modeling_perceiver.py index e973497bfc02..e79a7d599984 100755 --- a/src/transformers/models/perceiver/modeling_perceiver.py +++ b/src/transformers/models/perceiver/modeling_perceiver.py @@ -710,7 +710,7 @@ def _init_weights(self, module): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/plbart/modeling_plbart.py b/src/transformers/models/plbart/modeling_plbart.py index 26d84c01fc1f..b1a2088913fd 100755 --- a/src/transformers/models/plbart/modeling_plbart.py +++ b/src/transformers/models/plbart/modeling_plbart.py @@ -649,7 +649,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -741,7 +741,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -965,7 +965,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -1640,7 +1640,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Returns: diff --git a/src/transformers/models/poolformer/feature_extraction_poolformer.py b/src/transformers/models/poolformer/feature_extraction_poolformer.py index 72d06cfd74a5..88ddbfbe15b5 100644 --- a/src/transformers/models/poolformer/feature_extraction_poolformer.py +++ b/src/transformers/models/poolformer/feature_extraction_poolformer.py @@ -104,7 +104,7 @@ def __call__( tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a number of channels, H and W are image height and width. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`): + return_tensors (`str` or [`~utils.TensorType`], *optional*, defaults to `'np'`): If set, will return tensors of a particular framework. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/prophetnet/modeling_prophetnet.py b/src/transformers/models/prophetnet/modeling_prophetnet.py index 7c10261d2793..e2fed5dea4e7 100644 --- a/src/transformers/models/prophetnet/modeling_prophetnet.py +++ b/src/transformers/models/prophetnet/modeling_prophetnet.py @@ -139,7 +139,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ PROPHETNET_STANDALONE_INPUTS_DOCSTRING = r""" @@ -172,7 +172,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/qdqbert/modeling_qdqbert.py b/src/transformers/models/qdqbert/modeling_qdqbert.py index 612a1bb97500..4089f12d1fea 100755 --- a/src/transformers/models/qdqbert/modeling_qdqbert.py +++ b/src/transformers/models/qdqbert/modeling_qdqbert.py @@ -820,7 +820,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/rag/modeling_tf_rag.py b/src/transformers/models/rag/modeling_tf_rag.py index 5f652898b19e..c5245108e8e9 100644 --- a/src/transformers/models/rag/modeling_tf_rag.py +++ b/src/transformers/models/rag/modeling_tf_rag.py @@ -1075,7 +1075,7 @@ def generate( output_scores (`bool`, *optional*, defaults to `False`): Whether or not to return the prediction scores. See `scores` under returned tensors for more details. return_dict_in_generate (`bool`, *optional*, defaults to `False`): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. model_specific_kwargs: Additional model specific kwargs will be forwarded to the `forward` function of the model. diff --git a/src/transformers/models/rag/retrieval_rag.py b/src/transformers/models/rag/retrieval_rag.py index de2d03e6d69d..f39fc48d27c8 100644 --- a/src/transformers/models/rag/retrieval_rag.py +++ b/src/transformers/models/rag/retrieval_rag.py @@ -581,7 +581,7 @@ def __call__( The prefix used by the generator's tokenizer. n_docs (`int`, *optional*): The number of docs retrieved per query. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to "pt"): + return_tensors (`str` or [`~utils.TensorType`], *optional*, defaults to "pt"): If set, will return tensors instead of list of python integers. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/realm/modeling_realm.py b/src/transformers/models/realm/modeling_realm.py index 94c0c730a338..59e1f275bc36 100644 --- a/src/transformers/models/realm/modeling_realm.py +++ b/src/transformers/models/realm/modeling_realm.py @@ -950,7 +950,7 @@ def mask_to_score(mask): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -1715,7 +1715,7 @@ def mask_to_score(mask): config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-1` are ignored (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]` return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/reformer/modeling_reformer.py b/src/transformers/models/reformer/modeling_reformer.py index b58e5a02893b..7934320a4d87 100755 --- a/src/transformers/models/reformer/modeling_reformer.py +++ b/src/transformers/models/reformer/modeling_reformer.py @@ -1950,7 +1950,7 @@ class ReformerModelWithLMHeadOutput(ModelOutput): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/rembert/modeling_rembert.py b/src/transformers/models/rembert/modeling_rembert.py index 2467a9a9afe7..428cdd5ad98a 100755 --- a/src/transformers/models/rembert/modeling_rembert.py +++ b/src/transformers/models/rembert/modeling_rembert.py @@ -739,7 +739,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/rembert/modeling_tf_rembert.py b/src/transformers/models/rembert/modeling_tf_rembert.py index 8d42805caec1..9a3892f409fe 100644 --- a/src/transformers/models/rembert/modeling_tf_rembert.py +++ b/src/transformers/models/rembert/modeling_tf_rembert.py @@ -917,8 +917,8 @@ def dummy_inputs(self): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False``): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/resnet/modeling_resnet.py b/src/transformers/models/resnet/modeling_resnet.py index bf27ea98910b..7e74cdf8dcb7 100644 --- a/src/transformers/models/resnet/modeling_resnet.py +++ b/src/transformers/models/resnet/modeling_resnet.py @@ -293,7 +293,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/roberta/modeling_flax_roberta.py b/src/transformers/models/roberta/modeling_flax_roberta.py index 78f00a95a055..8a3796bd2f20 100644 --- a/src/transformers/models/roberta/modeling_flax_roberta.py +++ b/src/transformers/models/roberta/modeling_flax_roberta.py @@ -125,7 +125,7 @@ def create_position_ids_from_input_ids(input_ids, padding_idx): - 0 indicates the head is **masked**. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/roberta/modeling_roberta.py b/src/transformers/models/roberta/modeling_roberta.py index fd21085e3264..ca6678282e5c 100644 --- a/src/transformers/models/roberta/modeling_roberta.py +++ b/src/transformers/models/roberta/modeling_roberta.py @@ -685,7 +685,7 @@ def update_keys_to_ignore(self, config, del_keys_to_ignore): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/roberta/modeling_tf_roberta.py b/src/transformers/models/roberta/modeling_tf_roberta.py index bbdf7ebf3301..a62659582b7e 100644 --- a/src/transformers/models/roberta/modeling_tf_roberta.py +++ b/src/transformers/models/roberta/modeling_tf_roberta.py @@ -895,8 +895,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/roformer/modeling_flax_roformer.py b/src/transformers/models/roformer/modeling_flax_roformer.py index d1a3e917a0b7..d0261ee835ec 100644 --- a/src/transformers/models/roformer/modeling_flax_roformer.py +++ b/src/transformers/models/roformer/modeling_flax_roformer.py @@ -123,7 +123,7 @@ - 0 indicates the head is **masked**. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/roformer/modeling_roformer.py b/src/transformers/models/roformer/modeling_roformer.py index a2b60c46ce46..475655b15b55 100644 --- a/src/transformers/models/roformer/modeling_roformer.py +++ b/src/transformers/models/roformer/modeling_roformer.py @@ -783,7 +783,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/roformer/modeling_tf_roformer.py b/src/transformers/models/roformer/modeling_tf_roformer.py index 4fd8611b6e55..bed8ecf975c2 100644 --- a/src/transformers/models/roformer/modeling_tf_roformer.py +++ b/src/transformers/models/roformer/modeling_tf_roformer.py @@ -780,8 +780,8 @@ class TFRoFormerPreTrainedModel(TFPreTrainedModel): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False``): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/segformer/feature_extraction_segformer.py b/src/transformers/models/segformer/feature_extraction_segformer.py index c61abb696bb5..c706c559af3c 100644 --- a/src/transformers/models/segformer/feature_extraction_segformer.py +++ b/src/transformers/models/segformer/feature_extraction_segformer.py @@ -112,7 +112,7 @@ def __call__( segmentation_maps (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`, *optional*): Optionally, the corresponding semantic segmentation maps with the pixel-wise annotations. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`): + return_tensors (`str` or [`~utils.TensorType`], *optional*, defaults to `'np'`): If set, will return tensors of a particular framework. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/segformer/modeling_segformer.py b/src/transformers/models/segformer/modeling_segformer.py index 1d02b229cee9..cefe94e18622 100755 --- a/src/transformers/models/segformer/modeling_segformer.py +++ b/src/transformers/models/segformer/modeling_segformer.py @@ -467,7 +467,7 @@ def _init_weights(self, module): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/sew/modeling_sew.py b/src/transformers/models/sew/modeling_sew.py index 2ddf7024cb0e..e3561f780b5b 100644 --- a/src/transformers/models/sew/modeling_sew.py +++ b/src/transformers/models/sew/modeling_sew.py @@ -828,7 +828,7 @@ def _get_feature_vector_attention_mask(self, feature_vector_length: int, attenti Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/sew_d/modeling_sew_d.py b/src/transformers/models/sew_d/modeling_sew_d.py index 24470fc5d3d1..4470a10ebcd1 100644 --- a/src/transformers/models/sew_d/modeling_sew_d.py +++ b/src/transformers/models/sew_d/modeling_sew_d.py @@ -1340,7 +1340,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py b/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py index effc6100c20e..16067e2c2052 100644 --- a/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py +++ b/src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py @@ -120,7 +120,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - If set to `True`, the model will return a [`~file_utils.FlaxSeq2SeqLMOutput`] instead of a plain tuple. + If set to `True`, the model will return a [`~utils.FlaxSeq2SeqLMOutput`] instead of a plain tuple. """ SPEECH_ENCODER_DECODER_ENCODE_INPUTS_DOCSTRING = r""" @@ -145,7 +145,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - If set to `True`, the model will return a [`~file_utils.FlaxBaseModelOutput`] instead of a plain tuple. + If set to `True`, the model will return a [`~utils.FlaxBaseModelOutput`] instead of a plain tuple. """ SPEECH_ENCODER_DECODER_DECODE_INPUTS_DOCSTRING = r""" @@ -191,8 +191,8 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - If set to `True`, the model will return a [`~file_utils.FlaxCausalLMOutputWithCrossAttentions`] instead of - a plain tuple. + If set to `True`, the model will return a [`~utils.FlaxCausalLMOutputWithCrossAttentions`] instead of a + plain tuple. """ diff --git a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py index 2057f9742139..45262ad940fe 100644 --- a/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +++ b/src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py @@ -142,7 +142,7 @@ [`Speech2TextTokenizer`] should be used for extracting the fbank features, padding and conversion into a tensor of type `torch.FloatTensor`. See [`~Speech2TextTokenizer.__call__`] return_dict (`bool`, *optional*): - If set to `True`, the model will return a [`~file_utils.Seq2SeqLMOutput`] instead of a plain tuple. + If set to `True`, the model will return a [`~utils.Seq2SeqLMOutput`] instead of a plain tuple. kwargs: (*optional*) Remaining dictionary of keyword arguments. Keyword arguments come in two flavors: - Without a prefix which will be input as `**encoder_kwargs` for the encoder forward function. diff --git a/src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py b/src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py index aa2330175548..e6ff52f18360 100644 --- a/src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py +++ b/src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py @@ -142,7 +142,7 @@ def __call__( raw_speech (`np.ndarray`, `List[float]`, `List[np.ndarray]`, `List[List[float]]`): The sequence or batch of sequences to be padded. Each sequence can be a numpy array, a list of float values, a list of numpy arrays or a list of list of float values. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`): Select a strategy to pad the returned sequences (according to the model's padding side and padding index) among: @@ -174,7 +174,7 @@ def __call__( - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of list of python integers. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/speech_to_text/modeling_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_speech_to_text.py index c7f0341bbd35..abefc1c1f3cf 100755 --- a/src/transformers/models/speech_to_text/modeling_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_speech_to_text.py @@ -679,7 +679,7 @@ def _get_feature_vector_attention_mask(self, feature_vector_length, attention_ma Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -757,7 +757,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -974,7 +974,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py index 7b5a2f4716c9..7bdf620c6555 100755 --- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py @@ -710,8 +710,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -821,7 +821,7 @@ def call( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ if input_features is None: raise ValueError("You have to specify input_features") @@ -1017,7 +1017,7 @@ def call( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ if input_ids is not None and inputs_embeds is not None: diff --git a/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py b/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py index 13fe79b31037..dccbd2adf48b 100755 --- a/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py +++ b/src/transformers/models/speech_to_text_2/modeling_speech_to_text_2.py @@ -581,7 +581,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -850,7 +850,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Returns: diff --git a/src/transformers/models/splinter/modeling_splinter.py b/src/transformers/models/splinter/modeling_splinter.py index 0bd51f437770..64ac946d9b87 100755 --- a/src/transformers/models/splinter/modeling_splinter.py +++ b/src/transformers/models/splinter/modeling_splinter.py @@ -600,7 +600,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/squeezebert/modeling_squeezebert.py b/src/transformers/models/squeezebert/modeling_squeezebert.py index 733e5f72783d..b8cdfe16a9f4 100644 --- a/src/transformers/models/squeezebert/modeling_squeezebert.py +++ b/src/transformers/models/squeezebert/modeling_squeezebert.py @@ -538,7 +538,7 @@ def _init_weights(self, module): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/swin/modeling_swin.py b/src/transformers/models/swin/modeling_swin.py index 8677e7491647..81e91a19dcca 100644 --- a/src/transformers/models/swin/modeling_swin.py +++ b/src/transformers/models/swin/modeling_swin.py @@ -873,7 +873,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/t5/modeling_flax_t5.py b/src/transformers/models/t5/modeling_flax_t5.py index a1387af50cd6..eb6056dabe97 100644 --- a/src/transformers/models/t5/modeling_flax_t5.py +++ b/src/transformers/models/t5/modeling_flax_t5.py @@ -800,7 +800,7 @@ def __call__( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ T5_DECODE_INPUTS_DOCSTRING = r""" @@ -841,7 +841,7 @@ def __call__( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -899,7 +899,7 @@ def __call__( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/t5/modeling_t5.py b/src/transformers/models/t5/modeling_t5.py index d43030c07e5e..b4793c204c76 100644 --- a/src/transformers/models/t5/modeling_t5.py +++ b/src/transformers/models/t5/modeling_t5.py @@ -1211,7 +1211,7 @@ def custom_forward(*inputs): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ T5_ENCODER_INPUTS_DOCSTRING = r""" @@ -1248,7 +1248,7 @@ def custom_forward(*inputs): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ # Warning message for FutureWarning: head_mask was separated into two input args - head_mask, decoder_head_mask diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index e6021aba5d0e..133103f3e855 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -1047,8 +1047,8 @@ def _shift_right(self, input_ids): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). @@ -1088,7 +1088,7 @@ def _shift_right(self, input_ids): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/tapas/modeling_tapas.py b/src/transformers/models/tapas/modeling_tapas.py index fd3672ab1851..5b3fc4b46031 100644 --- a/src/transformers/models/tapas/modeling_tapas.py +++ b/src/transformers/models/tapas/modeling_tapas.py @@ -845,7 +845,7 @@ class for more info. Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py index 820156312b67..8f2138f2fbad 100644 --- a/src/transformers/models/tapas/modeling_tf_tapas.py +++ b/src/transformers/models/tapas/modeling_tf_tapas.py @@ -947,8 +947,8 @@ class for more info. more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False``): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/tapas/tokenization_tapas.py b/src/transformers/models/tapas/tokenization_tapas.py index dfa451b73a08..27481c35fb14 100644 --- a/src/transformers/models/tapas/tokenization_tapas.py +++ b/src/transformers/models/tapas/tokenization_tapas.py @@ -146,7 +146,7 @@ def whitespace_tokenize(text): TAPAS_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING = r""" add_special_tokens (`bool`, *optional*, defaults to `True`): Whether or not to encode the sequences with the special tokens relative to their model. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`): Activates and controls padding. Accepts the following values: - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single @@ -176,7 +176,7 @@ def whitespace_tokenize(text): pad_to_multiple_of (`int`, *optional*): If set will pad the sequence to a multiple of the provided value. This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta). - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of list of python integers. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py index 7cc64cd6d756..d5dc28c36503 100644 --- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py @@ -863,8 +863,8 @@ class TFTransfoXLSequenceClassifierOutputWithPast(ModelOutput): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/transfo_xl/modeling_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_transfo_xl.py index 6fbca9218f98..f566262ff294 100644 --- a/src/transformers/models/transfo_xl/modeling_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_transfo_xl.py @@ -760,7 +760,7 @@ def logits(self): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/trocr/modeling_trocr.py b/src/transformers/models/trocr/modeling_trocr.py index fe5a045a56a0..8a26739d877f 100644 --- a/src/transformers/models/trocr/modeling_trocr.py +++ b/src/transformers/models/trocr/modeling_trocr.py @@ -609,7 +609,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -884,7 +884,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Returns: diff --git a/src/transformers/models/unispeech/modeling_unispeech.py b/src/transformers/models/unispeech/modeling_unispeech.py index 06e94f805769..e7206b587524 100755 --- a/src/transformers/models/unispeech/modeling_unispeech.py +++ b/src/transformers/models/unispeech/modeling_unispeech.py @@ -1076,7 +1076,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py index 52fe1edcb6de..ae5f57d8a1e5 100755 --- a/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py +++ b/src/transformers/models/unispeech_sat/modeling_unispeech_sat.py @@ -1116,7 +1116,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/van/modeling_van.py b/src/transformers/models/van/modeling_van.py index d25189765239..84c39b40af4e 100644 --- a/src/transformers/models/van/modeling_van.py +++ b/src/transformers/models/van/modeling_van.py @@ -444,7 +444,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all stages. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/vilt/feature_extraction_vilt.py b/src/transformers/models/vilt/feature_extraction_vilt.py index 98c4ccc72244..7fdd138750ac 100644 --- a/src/transformers/models/vilt/feature_extraction_vilt.py +++ b/src/transformers/models/vilt/feature_extraction_vilt.py @@ -145,7 +145,7 @@ def pad_and_create_pixel_mask( Args: pixel_values_list (`List[torch.Tensor]`): List of images (pixel values) to be padded. Each image should be a tensor of shape (C, H, W). - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of NumPy arrays. If set to `'pt'`, return PyTorch `torch.Tensor` objects. @@ -208,7 +208,7 @@ def __call__( - 1 for pixels that are real (i.e. **not masked**), - 0 for pixels that are padding (i.e. **masked**). - return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`): + return_tensors (`str` or [`~utils.TensorType`], *optional*, defaults to `'np'`): If set, will return tensors of a particular framework. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/vilt/modeling_vilt.py b/src/transformers/models/vilt/modeling_vilt.py index 9334588fbc2d..007b38894003 100755 --- a/src/transformers/models/vilt/modeling_vilt.py +++ b/src/transformers/models/vilt/modeling_vilt.py @@ -660,7 +660,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ VILT_IMAGES_AND_TEXT_CLASSIFICATION_INPUTS_DOCSTRING = r""" @@ -715,7 +715,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py index 83f163e48f35..7b8d92f13627 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py @@ -107,7 +107,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - If set to `True`, the model will return a [`~file_utils.FlaxSeq2SeqLMOutput`] instead of a plain tuple. + If set to `True`, the model will return a [`~utils.FlaxSeq2SeqLMOutput`] instead of a plain tuple. """ VISION_ENCODER_DECODER_ENCODE_INPUTS_DOCSTRING = r""" @@ -122,7 +122,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - If set to `True`, the model will return a [`~file_utils.FlaxBaseModelOutput`] instead of a plain tuple. + If set to `True`, the model will return a [`~utils.FlaxBaseModelOutput`] instead of a plain tuple. """ VISION_ENCODER_DECODER_DECODE_INPUTS_DOCSTRING = r""" @@ -161,8 +161,8 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - If set to `True`, the model will return a [`~file_utils.FlaxCausalLMOutputWithCrossAttentions`] instead of - a plain tuple. + If set to `True`, the model will return a [`~utils.FlaxCausalLMOutputWithCrossAttentions`] instead of a + plain tuple. """ diff --git a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py index 5884725afa00..965fc51d783b 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py @@ -132,7 +132,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - If set to `True`, the model will return a [`~file_utils.Seq2SeqLMOutput`] instead of a plain tuple. + If set to `True`, the model will return a [`~utils.Seq2SeqLMOutput`] instead of a plain tuple. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py index 850a554a4da1..999ba2d2db89 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py @@ -136,7 +136,7 @@ def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - If set to `True`, the model will return a [`~file_utils.Seq2SeqLMOutput`] instead of a plain tuple. + If set to `True`, the model will return a [`~utils.Seq2SeqLMOutput`] instead of a plain tuple. kwargs: (*optional*) Remaining dictionary of keyword arguments. Keyword arguments come in two flavors: - Without a prefix which will be input as `**encoder_kwargs` for the encoder forward function. diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py index c96e7ae40930..9a6b25a4d67b 100644 --- a/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py @@ -114,7 +114,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py index 6ec2cf987ba7..e13c9ca7ef8f 100755 --- a/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py @@ -89,7 +89,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ VISION_TEXT_DUAL_ENCODER_VISION_INPUTS_DOCSTRING = r""" @@ -104,7 +104,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ VISION_TEXT_DUAL_ENCODER_INPUTS_DOCSTRING = r""" @@ -142,7 +142,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py index 6cc58b26279b..849f4ad92ec9 100644 --- a/src/transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py @@ -60,7 +60,7 @@ def __call__(self, text=None, images=None, return_tensors=None, **kwargs): tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a number of channels, H and W are image height and width. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors of a particular framework. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/visual_bert/modeling_visual_bert.py b/src/transformers/models/visual_bert/modeling_visual_bert.py index 3b909234351d..0e5acf32b3c4 100755 --- a/src/transformers/models/visual_bert/modeling_visual_bert.py +++ b/src/transformers/models/visual_bert/modeling_visual_bert.py @@ -669,7 +669,7 @@ class VisualBertForPreTrainingOutput(ModelOutput): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/vit/feature_extraction_vit.py b/src/transformers/models/vit/feature_extraction_vit.py index 0a813a2c9cd7..29c0fa3fc4f6 100644 --- a/src/transformers/models/vit/feature_extraction_vit.py +++ b/src/transformers/models/vit/feature_extraction_vit.py @@ -98,7 +98,7 @@ def __call__( tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a number of channels, H and W are image height and width. - return_tensors (`str` or [`~file_utils.TensorType`], *optional*, defaults to `'np'`): + return_tensors (`str` or [`~utils.TensorType`], *optional*, defaults to `'np'`): If set, will return tensors of a particular framework. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/vit/modeling_flax_vit.py b/src/transformers/models/vit/modeling_flax_vit.py index 9340011ae271..b42076864dac 100644 --- a/src/transformers/models/vit/modeling_flax_vit.py +++ b/src/transformers/models/vit/modeling_flax_vit.py @@ -79,7 +79,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/vit/modeling_tf_vit.py b/src/transformers/models/vit/modeling_tf_vit.py index fdd7e85ce1de..e2e946d8c9f4 100644 --- a/src/transformers/models/vit/modeling_tf_vit.py +++ b/src/transformers/models/vit/modeling_tf_vit.py @@ -626,8 +626,8 @@ def serving(self, inputs): interpolate_pos_encoding (`bool`, *optional*): Whether to interpolate the pre-trained position encodings. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False``): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/vit/modeling_vit.py b/src/transformers/models/vit/modeling_vit.py index ee2630a27a26..9679fef670b2 100644 --- a/src/transformers/models/vit/modeling_vit.py +++ b/src/transformers/models/vit/modeling_vit.py @@ -500,7 +500,7 @@ def _set_gradient_checkpointing(self, module: ViTEncoder, value: bool = False) - interpolate_pos_encoding (`bool`, *optional*): Whether to interpolate the pre-trained position encodings. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/vit_mae/modeling_vit_mae.py b/src/transformers/models/vit_mae/modeling_vit_mae.py index dfa6f5448359..d51e47e63bb2 100755 --- a/src/transformers/models/vit_mae/modeling_vit_mae.py +++ b/src/transformers/models/vit_mae/modeling_vit_mae.py @@ -631,7 +631,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/wav2vec2/feature_extraction_wav2vec2.py b/src/transformers/models/wav2vec2/feature_extraction_wav2vec2.py index 93497652e1ac..595fb11192ad 100644 --- a/src/transformers/models/wav2vec2/feature_extraction_wav2vec2.py +++ b/src/transformers/models/wav2vec2/feature_extraction_wav2vec2.py @@ -118,7 +118,7 @@ def __call__( raw_speech (`np.ndarray`, `List[float]`, `List[np.ndarray]`, `List[List[float]]`): The sequence or batch of sequences to be padded. Each sequence can be a numpy array, a list of float values, a list of numpy arrays or a list of list of float values. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`): Select a strategy to pad the returned sequences (according to the model's padding side and padding index) among: @@ -156,7 +156,7 @@ def __call__( - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of list of python integers. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py index 6f1021f118f4..59c5009716b1 100644 --- a/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py @@ -281,7 +281,7 @@ def _sample_negative_indices(features_shape: Tuple, num_negatives: int, attentio Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py index 4152c38af1dd..98f922c3d47e 100644 --- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py @@ -1413,8 +1413,8 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False``): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/wav2vec2/modeling_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_wav2vec2.py index 9deed24cca6e..08a86d540311 100755 --- a/src/transformers/models/wav2vec2/modeling_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_wav2vec2.py @@ -1226,7 +1226,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py index ddec5db3b42f..b9d60079e08e 100644 --- a/src/transformers/models/wav2vec2/tokenization_wav2vec2.py +++ b/src/transformers/models/wav2vec2/tokenization_wav2vec2.py @@ -69,7 +69,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {"facebook/wav2vec2-base-960h": sys.maxsize} WAV2VEC2_KWARGS_DOCSTRING = r""" - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`): Activates and controls padding. Accepts the following values: - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single @@ -87,7 +87,7 @@ pad_to_multiple_of (`int`, *optional*): If set will pad the sequence to a multiple of the provided value. This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta). - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of list of python integers. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/models/wavlm/modeling_wavlm.py b/src/transformers/models/wavlm/modeling_wavlm.py index 7c99a5d7808c..e6b47056ce2e 100755 --- a/src/transformers/models/wavlm/modeling_wavlm.py +++ b/src/transformers/models/wavlm/modeling_wavlm.py @@ -1176,7 +1176,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/xglm/modeling_flax_xglm.py b/src/transformers/models/xglm/modeling_flax_xglm.py index 01f8f33a0994..e519bc63afe7 100644 --- a/src/transformers/models/xglm/modeling_flax_xglm.py +++ b/src/transformers/models/xglm/modeling_flax_xglm.py @@ -107,7 +107,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/xglm/modeling_xglm.py b/src/transformers/models/xglm/modeling_xglm.py index 0dda424d542e..f07a68f06ed2 100755 --- a/src/transformers/models/xglm/modeling_xglm.py +++ b/src/transformers/models/xglm/modeling_xglm.py @@ -110,7 +110,7 @@ Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -669,7 +669,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index f677aa9eb517..dbb994ed47c1 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -667,8 +667,8 @@ class TFXLMWithLMHeadModelOutput(ModelOutput): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This argument can be used - in eager mode, in graph mode the value will always be set to True. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in + eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different behaviors between training and evaluation). diff --git a/src/transformers/models/xlm/modeling_xlm.py b/src/transformers/models/xlm/modeling_xlm.py index 4549089dea8a..0c05590d61c8 100755 --- a/src/transformers/models/xlm/modeling_xlm.py +++ b/src/transformers/models/xlm/modeling_xlm.py @@ -390,7 +390,7 @@ class XLMForQuestionAnsweringOutput(ModelOutput): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py index 0ff283ae30cf..aa7e47cc8806 100644 --- a/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +++ b/src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py @@ -661,7 +661,7 @@ def update_keys_to_ignore(self, config, del_keys_to_ignore): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index c164df41558e..3a77c4845dfd 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -1115,7 +1115,7 @@ class TFXLNetForQuestionAnsweringSimpleOutput(ModelOutput): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/xlnet/modeling_xlnet.py b/src/transformers/models/xlnet/modeling_xlnet.py index e490365104df..e84eb7404031 100755 --- a/src/transformers/models/xlnet/modeling_xlnet.py +++ b/src/transformers/models/xlnet/modeling_xlnet.py @@ -927,7 +927,7 @@ class XLNetForQuestionAnsweringOutput(ModelOutput): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/models/yoso/modeling_yoso.py b/src/transformers/models/yoso/modeling_yoso.py index 5914454f5cc5..994f80b9bdd2 100644 --- a/src/transformers/models/yoso/modeling_yoso.py +++ b/src/transformers/models/yoso/modeling_yoso.py @@ -737,7 +737,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ diff --git a/src/transformers/pipelines/table_question_answering.py b/src/transformers/pipelines/table_question_answering.py index 0e2df2cd8ab7..c13753032dec 100644 --- a/src/transformers/pipelines/table_question_answering.py +++ b/src/transformers/pipelines/table_question_answering.py @@ -286,7 +286,7 @@ def __call__(self, *args, **kwargs): Whether to do inference sequentially or as a batch. Batching is faster, but models like SQA require the inference to be done sequentially to extract relations within sequences, given their conversational nature. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`): Activates and controls padding. Accepts the following values: - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index f4fdad5c632f..0a04813a3143 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -120,7 +120,7 @@ def save_pretrained(self, save_directory, push_to_hub: bool = False, **kwargs): kwargs: - Additional key word arguments passed along to the [`~file_utils.PushToHubMixin.push_to_hub`] method. + Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. """ if push_to_hub: commit_message = kwargs.pop("commit_message", None) diff --git a/src/transformers/sagemaker/training_args_sm.py b/src/transformers/sagemaker/training_args_sm.py index f2b0ee8f5e2a..992f3d4fce30 100644 --- a/src/transformers/sagemaker/training_args_sm.py +++ b/src/transformers/sagemaker/training_args_sm.py @@ -26,7 +26,7 @@ logger = logging.get_logger(__name__) -# TODO: should be moved to `file_utils` after refactoring of SageMakerTrainer +# TODO: should be moved to `utils` after refactoring of SageMakerTrainer def is_sagemaker_model_parallel_available(): diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index cbf03dc9c15a..c7f76c91ddac 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -646,9 +646,9 @@ def convert_to_tensors( Convert the inner content to tensors. Args: - tensor_type (`str` or [`~file_utils.TensorType`], *optional*): - The type of tensors to use. If `str`, should be one of the values of the enum - [`~file_utils.TensorType`]. If `None`, no modification is done. + tensor_type (`str` or [`~utils.TensorType`], *optional*): + The type of tensors to use. If `str`, should be one of the values of the enum [`~utils.TensorType`]. If + `None`, no modification is done. prepend_batch_axis (`int`, *optional*, defaults to `False`): Whether or not to add the batch dimension during the conversion. """ @@ -1253,7 +1253,7 @@ def all_special_ids(self) -> List[int]: ENCODE_KWARGS_DOCSTRING = r""" add_special_tokens (`bool`, *optional*, defaults to `True`): Whether or not to encode the sequences with the special tokens relative to their model. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`): Activates and controls padding. Accepts the following values: - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single @@ -1295,7 +1295,7 @@ def all_special_ids(self) -> List[int]: pad_to_multiple_of (`int`, *optional*): If set will pad the sequence to a multiple of the provided value. This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= 7.5 (Volta). - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of list of python integers. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. @@ -2731,7 +2731,7 @@ def pad( Instead of `List[int]` you can have tensors (numpy arrays, PyTorch tensors or TensorFlow tensors), see the note above for the return type. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`): Select a strategy to pad the returned sequences (according to the model's padding side and padding index) among: @@ -2753,7 +2753,7 @@ def pad( to the specific tokenizer's default, defined by the `return_outputs` attribute. [What are attention masks?](../glossary#attention-mask) - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of list of python integers. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. @@ -3453,7 +3453,7 @@ def prepare_seq2seq_batch( max_target_length (`int`, *optional*): Controls the maximum length of decoder inputs (target language texts or summaries) If left unset or set to `None`, this will use the max_length value. - padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`): + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`): Activates and controls padding. Accepts the following values: - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single @@ -3462,7 +3462,7 @@ def prepare_seq2seq_batch( acceptable input length for the model if that argument is not provided. - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different lengths). - return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors instead of list of python integers. Acceptable values are: - `'tf'`: Return TensorFlow `tf.constant` objects. diff --git a/src/transformers/tokenization_utils_fast.py b/src/transformers/tokenization_utils_fast.py index 3a47cc265178..1ee2a44d9b9f 100644 --- a/src/transformers/tokenization_utils_fast.py +++ b/src/transformers/tokenization_utils_fast.py @@ -334,7 +334,7 @@ def set_truncation_and_padding( section. Args: - padding_strategy ([`~file_utils.PaddingStrategy`]): + padding_strategy ([`~utils.PaddingStrategy`]): The kind of padding that will be applied to the input truncation_strategy ([`~tokenization_utils_base.TruncationStrategy`]): The kind of truncation that will be applied to the input diff --git a/src/transformers/utils/generic.py b/src/transformers/utils/generic.py index f0be710dd852..e455cdc6adb0 100644 --- a/src/transformers/utils/generic.py +++ b/src/transformers/utils/generic.py @@ -150,8 +150,8 @@ class ModelOutput(OrderedDict): - You can't unpack a `ModelOutput` directly. Use the [`~file_utils.ModelOutput.to_tuple`] method to convert it to a - tuple before. + You can't unpack a `ModelOutput` directly. Use the [`~utils.ModelOutput.to_tuple`] method to convert it to a tuple + before. """ diff --git a/templates/adding_a_new_model/ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md b/templates/adding_a_new_model/ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md index 83ff5969eac1..3b2de6f3c098 100644 --- a/templates/adding_a_new_model/ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md +++ b/templates/adding_a_new_model/ADD_NEW_MODEL_PROPOSAL_TEMPLATE.md @@ -535,7 +535,7 @@ to make your debugging environment as efficient as possible. due to multiple dropout layers in the model. Make sure that the forward pass in your debugging environment is **deterministic** so that the dropout layers are not used. Or use - `transformers.file_utils.set_seed` if the old and new + `transformers.utils.set_seed` if the old and new implementations are in the same framework. #### More details on how to create a debugging environment for [camelcase name of model] diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_flax_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_flax_{{cookiecutter.lowercase_modelname}}.py index 500a5ccf8986..80e3c4468db7 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_flax_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_flax_{{cookiecutter.lowercase_modelname}}.py @@ -119,7 +119,7 @@ - 0 indicates the head is **masked**. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -1244,7 +1244,7 @@ class Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering(Flax{{cookiec Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -1275,7 +1275,7 @@ class Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering(Flax{{cookiec Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ {{cookiecutter.uppercase_modelname}}_DECODE_INPUTS_DOCSTRING = r""" @@ -1322,7 +1322,7 @@ class Flax{{cookiecutter.camelcase_modelname}}ForQuestionAnswering(Flax{{cookiec Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ def shift_tokens_right(input_ids: jnp.ndarray, pad_token_id: int, decoder_start_token_id: int) -> jnp.ndarray: diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py index b0341d157fa3..0fd552e02905 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py @@ -925,7 +925,7 @@ def dummy_inputs(self): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different @@ -2338,7 +2338,7 @@ def serving(self, inputs): more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different @@ -2429,7 +2429,7 @@ def call( for more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different @@ -2626,7 +2626,7 @@ def call( for more detail. This argument can be used only in eager mode, in graph mode the value in the config will be used instead. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. This + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in eager mode, in graph mode the value will always be set to True. training (`bool`, *optional*, defaults to `False`): Whether or not to use the model in training mode (some modules like dropout modules have different diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py index a36c2f068b83..bdb896a25277 100755 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py @@ -746,7 +746,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -2157,7 +2157,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -2186,7 +2186,7 @@ def _set_gradient_checkpointing(self, module, value=False): Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ @@ -2272,7 +2272,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -2494,7 +2494,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. """ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( @@ -3270,7 +3270,7 @@ def forward( Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more detail. return_dict (`bool`, *optional*): - Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple. + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. Returns: diff --git a/templates/adding_a_new_model/open_model_proposals/ADD_BIG_BIRD.md b/templates/adding_a_new_model/open_model_proposals/ADD_BIG_BIRD.md index fee2e53ed251..1c7827d898f4 100644 --- a/templates/adding_a_new_model/open_model_proposals/ADD_BIG_BIRD.md +++ b/templates/adding_a_new_model/open_model_proposals/ADD_BIG_BIRD.md @@ -532,7 +532,7 @@ to make your debugging environment as efficient as possible. due to multiple dropout layers in the model. Make sure that the forward pass in your debugging environment is **deterministic** so that the dropout layers are not used. Or use - `transformers.file_utils.set_seed` if the old and new + `transformers.utils.set_seed` if the old and new implementations are in the same framework. #### (Important) More details on how to create a debugging environment for BigBird diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py index 6720f3d8d35a..dae4fa421845 100644 --- a/utils/tests_fetcher.py +++ b/utils/tests_fetcher.py @@ -268,6 +268,8 @@ def create_reverse_dependency_map(): "feature_extraction_sequence_utils.py": "test_sequence_feature_extraction_common.py", "feature_extraction_utils.py": "test_feature_extraction_common.py", "file_utils.py": ["utils/test_file_utils.py", "utils/test_model_output.py"], + "utils/generic.py": ["utils/test_file_utils.py", "utils/test_model_output.py"], + "utils/hub.py": "utils/test_file_utils.py", "modelcard.py": "utils/test_model_card.py", "modeling_flax_utils.py": "test_modeling_flax_common.py", "modeling_tf_utils.py": ["test_modeling_tf_common.py", "utils/test_modeling_tf_core.py"],