Big file_utils cleanup (huggingface#16396)

* Big file_utils cleanup * This one still needs to be treated separately
pathwaymd · Mar 25, 2022 · 088c188 · 088c188
1 parent 2b23e08
commit 088c188
Show file tree

Hide file tree

Showing 222 changed files with 441 additions and 439 deletions.
diff --git a/ISSUES.md b/ISSUES.md
@@ -72,7 +72,7 @@ You are not required to read the following guidelines before opening an issue. H
        from . import dependency_versions_check
      File "/transformers/src/transformers/dependency_versions_check.py", line 34, in <module>
        from .utils import is_tokenizers_available
-     File "/transformers/src/transformers/file_utils.py", line 40, in <module>
+     File "/transformers/src/transformers/utils/import_utils.py", line 40, in <module>
        from tqdm.auto import tqdm
     ModuleNotFoundError: No module named 'tqdm.auto'
     ```
@@ -125,7 +125,7 @@ You are not required to read the following guidelines before opening an issue. H
        from . import dependency_versions_check
      File "/transformers/src/transformers/dependency_versions_check.py", line 34, in <module>
        from .utils import is_tokenizers_available
-     File "/transformers/src/transformers/file_utils.py", line 40, in <module>
+     File "/transformers/src/transformers/utils/import_utils.py", line 40, in <module>
        from tqdm.auto import tqdm
    ModuleNotFoundError: No module named 'tqdm.auto'
    ```

diff --git a/docs/README.md b/docs/README.md
@@ -172,9 +172,9 @@ adds a link to its documentation with this syntax: \[\`XXXClass\`\] or \[\`funct
 function to be in the main package.
 
 If you want to create a link to some internal class or function, you need to
-provide its path. For instance: \[\`file_utils.ModelOutput\`\]. This will be converted into a link with
-`file_utils.ModelOutput` in the description. To get rid of the path and only keep the name of the object you are
-linking to in the description, add a ~: \[\`~file_utils.ModelOutput\`\] will generate a link with `ModelOutput` in the description.
+provide its path. For instance: \[\`utils.ModelOutput\`\]. This will be converted into a link with
+`utils.ModelOutput` in the description. To get rid of the path and only keep the name of the object you are
+linking to in the description, add a ~: \[\`~utils.ModelOutput\`\] will generate a link with `ModelOutput` in the description.
 
 The same works for methods so you can either use \[\`XXXClass.method\`\] or \[~\`XXXClass.method\`\].
 

diff --git a/docs/source/add_new_model.mdx b/docs/source/add_new_model.mdx
@@ -381,7 +381,7 @@ important. Here is some advice is to make your debugging environment as efficien
   original code so that you can directly input the ids instead of an input string.
 - Make sure that the model in your debugging setup is **not** in training mode, which often causes the model to yield
   random outputs due to multiple dropout layers in the model. Make sure that the forward pass in your debugging
-  environment is **deterministic** so that the dropout layers are not used. Or use *transformers.file_utils.set_seed*
+  environment is **deterministic** so that the dropout layers are not used. Or use *transformers.utils.set_seed*
   if the old and new implementations are in the same framework.
 
 The following section gives you more specific details/tips on how you can do this for *brand_new_bert*.

diff --git a/docs/source/internal/file_utils.mdx b/docs/source/internal/file_utils.mdx
@@ -12,35 +12,35 @@ specific language governing permissions and limitations under the License.
 
 # General Utilities
 
-This page lists all of Transformers general utility functions that are found in the file `file_utils.py`.
+This page lists all of Transformers general utility functions that are found in the file `utils.py`.
 
 Most of those are only useful if you are studying the general code in the library.
 
 
 ## Enums and namedtuples
 
-[[autodoc]] file_utils.ExplicitEnum
+[[autodoc]] utils.ExplicitEnum
 
-[[autodoc]] file_utils.PaddingStrategy
+[[autodoc]] utils.PaddingStrategy
 
-[[autodoc]] file_utils.TensorType
+[[autodoc]] utils.TensorType
 
 ## Special Decorators
 
-[[autodoc]] file_utils.add_start_docstrings
+[[autodoc]] utils.add_start_docstrings
 
-[[autodoc]] file_utils.add_start_docstrings_to_model_forward
+[[autodoc]] utils.add_start_docstrings_to_model_forward
 
-[[autodoc]] file_utils.add_end_docstrings
+[[autodoc]] utils.add_end_docstrings
 
-[[autodoc]] file_utils.add_code_sample_docstrings
+[[autodoc]] utils.add_code_sample_docstrings
 
-[[autodoc]] file_utils.replace_return_docstrings
+[[autodoc]] utils.replace_return_docstrings
 
 ## Special Properties
 
-[[autodoc]] file_utils.cached_property
+[[autodoc]] utils.cached_property
 
 ## Other Utilities
 
-[[autodoc]] file_utils._LazyModule
+[[autodoc]] utils._LazyModule
diff --git a/docs/source/internal/generation_utils.mdx b/docs/source/internal/generation_utils.mdx
@@ -25,7 +25,7 @@ Most of those are only useful if you are studying the code of the generate metho
 ## Generate Outputs
 
 The output of [`~generation_utils.GenerationMixin.generate`] is an instance of a subclass of
-[`~file_utils.ModelOutput`]. This output is a data structure containing all the information returned
+[`~utils.ModelOutput`]. This output is a data structure containing all the information returned
 by [`~generation_utils.GenerationMixin.generate`], but that can also be used as tuple or dictionary.
 
 Here's an example:

diff --git a/docs/source/main_classes/model.mdx b/docs/source/main_classes/model.mdx
@@ -88,4 +88,4 @@ Due to Pytorch design, this functionality is only available for floating dtypes.
 
 ## Pushing to the Hub
 
-[[autodoc]] file_utils.PushToHubMixin
+[[autodoc]] utils.PushToHubMixin
diff --git a/docs/source/main_classes/output.mdx b/docs/source/main_classes/output.mdx
@@ -12,7 +12,7 @@ specific language governing permissions and limitations under the License.
 
 # Model outputs
 
-All models have outputs that are instances of subclasses of [`~file_utils.ModelOutput`]. Those are
+All models have outputs that are instances of subclasses of [`~utils.ModelOutput`]. Those are
 data structures containing all the information returned by the model, but that can also be used as tuples or
 dictionaries.
 
@@ -57,7 +57,7 @@ documented on their corresponding model page.
 
 ## ModelOutput
 
-[[autodoc]] file_utils.ModelOutput
+[[autodoc]] utils.ModelOutput
     - to_tuple
 
 ## BaseModelOutput

diff --git a/docs/source/main_classes/trainer.mdx b/docs/source/main_classes/trainer.mdx
@@ -40,7 +40,7 @@ The [`Trainer`] contains the basic training loop which supports the above featur
 The [`Trainer`] class is optimized for 🤗 Transformers models and can have surprising behaviors
 when you use it on other models. When using it on your own model, make sure:
 
-- your model always return tuples or subclasses of [`~file_utils.ModelOutput`].
+- your model always return tuples or subclasses of [`~utils.ModelOutput`].
 - your model can compute the loss if a `labels` argument is provided and that loss is returned as the first
   element of the tuple (if your model returns tuples)
 - your model can accept multiple label arguments (use the `label_names` in your [`TrainingArguments`] to indicate their name to the [`Trainer`]) but none of them should be named `"label"`.

diff --git a/docs/source/performance.mdx b/docs/source/performance.mdx
@@ -855,7 +855,7 @@ If you need to switch a tensor to bf16, it's just: `t.to(dtype=torch.bfloat16)`
 Here is how you can check if your setup supports bf16:
 
 ```
-python -c 'import transformers; print(f"BF16 support is {transformers.file_utils.is_torch_bf16_available()}")'
+python -c 'import transformers; print(f"BF16 support is {transformers.utils.is_torch_bf16_available()}")'
 ```
 
 On the other hand bf16 has a much worse precision than fp16, so there are certain situations where you'd still want to use fp16 and not bf16.

diff --git a/examples/pytorch/multiple-choice/run_swag.py b/examples/pytorch/multiple-choice/run_swag.py
@@ -153,7 +153,7 @@ class DataCollatorForMultipleChoice:
     Args:
         tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]):
             The tokenizer used for encoding the data.
-        padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`):
+        padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`):
             Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
             among:
 

diff --git a/examples/pytorch/multiple-choice/run_swag_no_trainer.py b/examples/pytorch/multiple-choice/run_swag_no_trainer.py
@@ -193,7 +193,7 @@ class DataCollatorForMultipleChoice:
     Args:
         tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]):
             The tokenizer used for encoding the data.
-        padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`):
+        padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`):
             Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
             among:
 

diff --git a/examples/tensorflow/multiple-choice/run_swag.py b/examples/tensorflow/multiple-choice/run_swag.py
@@ -74,7 +74,7 @@ class DataCollatorForMultipleChoice:
     Args:
         tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]):
             The tokenizer used for encoding the data.
-        padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`):
+        padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`):
             Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
             among:
 

diff --git a/src/transformers/commands/add_new_model_like.py b/src/transformers/commands/add_new_model_like.py
@@ -784,7 +784,7 @@ def clean_frameworks_in_init(
             indent = find_indent(lines[idx])
             while find_indent(lines[idx]) >= indent or is_empty_line(lines[idx]):
                 idx += 1
-        # Remove the import from file_utils
+        # Remove the import from utils
         elif re_is_xxx_available.search(lines[idx]) is not None:
             line = lines[idx]
             for framework in to_remove:

diff --git a/src/transformers/configuration_utils.py b/src/transformers/configuration_utils.py
@@ -93,7 +93,7 @@ class PretrainedConfig(PushToHubMixin):
         output_attentions (`bool`, *optional*, defaults to `False`):
             Whether or not the model should returns all attentions.
         return_dict (`bool`, *optional*, defaults to `True`):
-            Whether or not the model should return a [`~transformers.file_utils.ModelOutput`] instead of a plain tuple.
+            Whether or not the model should return a [`~transformers.utils.ModelOutput`] instead of a plain tuple.
         is_encoder_decoder (`bool`, *optional*, defaults to `False`):
             Whether the model is used as an encoder/decoder or not.
         is_decoder (`bool`, *optional*, defaults to `False`):
@@ -170,7 +170,7 @@ class PretrainedConfig(PushToHubMixin):
         output_scores (`bool`, *optional*, defaults to `False`):
             Whether the model should return the logits when used for generation.
         return_dict_in_generate (`bool`, *optional*, defaults to `False`):
-            Whether the model should return a [`~transformers.file_utils.ModelOutput`] instead of a `torch.LongTensor`.
+            Whether the model should return a [`~transformers.utils.ModelOutput`] instead of a `torch.LongTensor`.
         forced_bos_token_id (`int`, *optional*):
             The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for
             multilingual models like [mBART](../model_doc/mbart) where the first generated token needs to be the target
@@ -379,7 +379,7 @@ def name_or_path(self, value):
     @property
     def use_return_dict(self) -> bool:
         """
-        `bool`: Whether or not return [`~file_utils.ModelOutput`] instead of tuples.
+        `bool`: Whether or not return [`~utils.ModelOutput`] instead of tuples.
         """
         # If torchscript is set, force `return_dict=False` to avoid jit errors
         return self.return_dict and not self.torchscript
@@ -417,7 +417,7 @@ def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub:
                 </Tip>
 
             kwargs:
-                Additional key word arguments passed along to the [`~file_utils.PushToHubMixin.push_to_hub`] method.
+                Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
         """
         if os.path.isfile(save_directory):
             raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")

diff --git a/src/transformers/data/data_collator.py b/src/transformers/data/data_collator.py
@@ -216,7 +216,7 @@ class DataCollatorWithPadding:
     Args:
         tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]):
             The tokenizer used for encoding the data.
-        padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`):
+        padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`):
             Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
             among:
 
@@ -268,7 +268,7 @@ class DataCollatorForTokenClassification(DataCollatorMixin):
     Args:
         tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]):
             The tokenizer used for encoding the data.
-        padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`):
+        padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`):
             Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
             among:
 
@@ -523,7 +523,7 @@ class DataCollatorForSeq2Seq:
             prepare the *decoder_input_ids*
 
             This is useful when using *label_smoothing* to avoid calculating loss twice.
-        padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`):
+        padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`):
             Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
             among:
 

diff --git a/src/transformers/feature_extraction_sequence_utils.py b/src/transformers/feature_extraction_sequence_utils.py
@@ -90,7 +90,7 @@ def pad(
 
                 Instead of `List[float]` you can have tensors (numpy arrays, PyTorch tensors or TensorFlow tensors),
                 see the note above for the return type.
-            padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `True`):
+            padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`):
                 Select a strategy to pad the returned sequences (according to the model's padding side and padding
                 index) among:
 
@@ -114,7 +114,7 @@ def pad(
                 to the specific feature_extractor's default.
 
                 [What are attention masks?](../glossary#attention-mask)
-            return_tensors (`str` or [`~file_utils.TensorType`], *optional*):
+            return_tensors (`str` or [`~utils.TensorType`], *optional*):
                 If set, will return tensors instead of list of python integers. Acceptable values are:
 
                 - `'tf'`: Return TensorFlow `tf.constant` objects.

diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py
@@ -117,9 +117,9 @@ def convert_to_tensors(self, tensor_type: Optional[Union[str, TensorType]] = Non
         Convert the inner content to tensors.
 
         Args:
-            tensor_type (`str` or [`~file_utils.TensorType`], *optional*):
-                The type of tensors to use. If `str`, should be one of the values of the enum
-                [`~file_utils.TensorType`]. If `None`, no modification is done.
+            tensor_type (`str` or [`~utils.TensorType`], *optional*):
+                The type of tensors to use. If `str`, should be one of the values of the enum [`~utils.TensorType`]. If
+                `None`, no modification is done.
         """
         if tensor_type is None:
             return self
@@ -328,7 +328,7 @@ def save_pretrained(self, save_directory: Union[str, os.PathLike], push_to_hub:
                 </Tip>
 
             kwargs:
-                Additional key word arguments passed along to the [`~file_utils.PushToHubMixin.push_to_hub`] method.
+                Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
         """
         if os.path.isfile(save_directory):
             raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")

diff --git a/src/transformers/generation_flax_utils.py b/src/transformers/generation_flax_utils.py
@@ -241,7 +241,7 @@ def generate(
                 should be prefixed with *decoder_*. Also accepts `encoder_outputs` to skip encoder part.
 
         Return:
-            [`~file_utils.ModelOutput`].
+            [`~utils.ModelOutput`].
 
         Examples:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -88,4 +88,4 @@ Due to Pytorch design, this functionality is only available for floating dtypes.

		## Pushing to the Hub

		[[autodoc]] file_utils.PushToHubMixin
		[[autodoc]] utils.PushToHubMixin