diff --git a/docs/source/community.md b/docs/source/community.md index 4c4af370a50102..8f979a601a9b9d 100644 --- a/docs/source/community.md +++ b/docs/source/community.md @@ -52,6 +52,8 @@ This page regroups resources around 🤗 Transformers developed by the community |[Fine-tune BART for summarization in two languages with Trainer class](https://github.com/elsanns/xai-nlp-notebooks/blob/master/fine_tune_bart_summarization_two_langs.ipynb) | How to fine-tune BART for summarization in two languages with Trainer class | [Eliza Szczechla](https://github.com/elsanns) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/elsanns/xai-nlp-notebooks/blob/master/fine_tune_bart_summarization_two_langs.ipynb)| |[Evaluate Big Bird on Trivia QA](https://github.com/patrickvonplaten/notebooks/blob/master/Evaluating_Big_Bird_on_TriviaQA.ipynb) | How to evaluate BigBird on long document question answering on Trivia QA | [Patrick von Platen](https://github.com/patrickvonplaten) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/Evaluating_Big_Bird_on_TriviaQA.ipynb)| | [Create video captions using Wav2Vec2](https://github.com/Muennighoff/ytclipcc/blob/main/wav2vec_youtube_captions.ipynb) | How to create YouTube captions from any video by transcribing the audio with Wav2Vec | [Niklas Muennighoff](https://github.com/Muennighoff) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Muennighoff/ytclipcc/blob/main/wav2vec_youtube_captions.ipynb) | +| [Fine-tune the Vision Transformer on CIFAR-10 using PyTorch Lightning](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/VisionTransformer/Fine_tuning_the_Vision_Transformer_on_CIFAR_10_with_PyTorch_Lightning.ipynb) | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and PyTorch Lightning | [Niels Rogge](https://github.com/nielsrogge) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/VisionTransformer/Fine_tuning_the_Vision_Transformer_on_CIFAR_10_with_PyTorch_Lightning.ipynb) | +| [Fine-tune the Vision Transformer on CIFAR-10 using the 🤗 Trainer](https://github.com/NielsRogge/Transformers-Tutorials/blob/master/VisionTransformer/Fine_tuning_the_Vision_Transformer_on_CIFAR_10_with_the_%F0%9F%A4%97_Trainer.ipynb) | How to fine-tune the Vision Transformer (ViT) on CIFAR-10 using HuggingFace Transformers, Datasets and the 🤗 Trainer | [Niels Rogge](https://github.com/nielsrogge) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/VisionTransformer/Fine_tuning_the_Vision_Transformer_on_CIFAR_10_with_the_%F0%9F%A4%97_Trainer.ipynb) | | [Evaluate LUKE on Open Entity, an entity typing dataset](https://github.com/studio-ousia/luke/blob/master/notebooks/huggingface_open_entity.ipynb) | How to evaluate *LukeForEntityClassification* on the Open Entity dataset | [Ikuya Yamada](https://github.com/ikuyamada) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/studio-ousia/luke/blob/master/notebooks/huggingface_open_entity.ipynb) | | [Evaluate LUKE on TACRED, a relation extraction dataset](https://github.com/studio-ousia/luke/blob/master/notebooks/huggingface_tacred.ipynb) | How to evaluate *LukeForEntityPairClassification* on the TACRED dataset | [Ikuya Yamada](https://github.com/ikuyamada) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/studio-ousia/luke/blob/master/notebooks/huggingface_tacred.ipynb) | | [Evaluate LUKE on CoNLL-2003, an important NER benchmark](https://github.com/studio-ousia/luke/blob/master/notebooks/huggingface_conll_2003.ipynb) | How to evaluate *LukeForEntitySpanClassification* on the CoNLL-2003 dataset | [Ikuya Yamada](https://github.com/ikuyamada) |[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/studio-ousia/luke/blob/master/notebooks/huggingface_conll_2003.ipynb) | diff --git a/src/transformers/models/deit/feature_extraction_deit.py b/src/transformers/models/deit/feature_extraction_deit.py index aae149c40b3ee9..591630fff77701 100644 --- a/src/transformers/models/deit/feature_extraction_deit.py +++ b/src/transformers/models/deit/feature_extraction_deit.py @@ -38,8 +38,10 @@ class DeiTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): Args: do_resize (:obj:`bool`, `optional`, defaults to :obj:`True`): Whether to resize the input to a certain :obj:`size`. - size (:obj:`int`, `optional`, defaults to 256): - Resize the input to the given size. Only has an effect if :obj:`do_resize` is set to :obj:`True`. + size (:obj:`int` or :obj:`Tuple(int)`, `optional`, defaults to 256): + Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an + integer is provided, then the input will be resized to (size, size). Only has an effect if :obj:`do_resize` + is set to :obj:`True`. resample (:obj:`int`, `optional`, defaults to :obj:`PIL.Image.BICUBIC`): An optional resampling filter. This can be one of :obj:`PIL.Image.NEAREST`, :obj:`PIL.Image.BOX`, :obj:`PIL.Image.BILINEAR`, :obj:`PIL.Image.HAMMING`, :obj:`PIL.Image.BICUBIC` or :obj:`PIL.Image.LANCZOS`. @@ -115,7 +117,8 @@ def __call__( Returns: :class:`~transformers.BatchFeature`: A :class:`~transformers.BatchFeature` with the following fields: - - **pixel_values** -- Pixel values to be fed to a model. + - **pixel_values** -- Pixel values to be fed to a model, of shape (batch_size, num_channels, height, + width). """ # Input type checking for clearer error valid_images = False diff --git a/src/transformers/models/deit/modeling_deit.py b/src/transformers/models/deit/modeling_deit.py index 602d5e26005b9f..f620e6b78845b2 100644 --- a/src/transformers/models/deit/modeling_deit.py +++ b/src/transformers/models/deit/modeling_deit.py @@ -417,9 +417,8 @@ def _init_weights(self, module): DEIT_INPUTS_DOCSTRING = r""" Args: pixel_values (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, num_channels, height, width)`): - Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using - :class:`~transformers.DeiTFeatureExtractor`. See :meth:`transformers.DeiTFeatureExtractor.__call__` for - details. + Pixel values. Pixel values can be obtained using :class:`~transformers.DeiTFeatureExtractor`. See + :meth:`transformers.DeiTFeatureExtractor.__call__` for details. head_mask (:obj:`torch.FloatTensor` of shape :obj:`(num_heads,)` or :obj:`(num_layers, num_heads)`, `optional`): Mask to nullify selected heads of the self-attention modules. Mask values selected in ``[0, 1]``: diff --git a/src/transformers/models/vit/feature_extraction_vit.py b/src/transformers/models/vit/feature_extraction_vit.py index 50e5d3ba3da1a8..a5177a15b4b032 100644 --- a/src/transformers/models/vit/feature_extraction_vit.py +++ b/src/transformers/models/vit/feature_extraction_vit.py @@ -38,8 +38,10 @@ class ViTFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin): Args: do_resize (:obj:`bool`, `optional`, defaults to :obj:`True`): Whether to resize the input to a certain :obj:`size`. - size (:obj:`int`, `optional`, defaults to 224): - Resize the input to the given size. Only has an effect if :obj:`do_resize` is set to :obj:`True`. + size (:obj:`int` or :obj:`Tuple(int)`, `optional`, defaults to 224): + Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an + integer is provided, then the input will be resized to (size, size). Only has an effect if :obj:`do_resize` + is set to :obj:`True`. resample (:obj:`int`, `optional`, defaults to :obj:`PIL.Image.BILINEAR`): An optional resampling filter. This can be one of :obj:`PIL.Image.NEAREST`, :obj:`PIL.Image.BOX`, :obj:`PIL.Image.BILINEAR`, :obj:`PIL.Image.HAMMING`, :obj:`PIL.Image.BICUBIC` or :obj:`PIL.Image.LANCZOS`. @@ -105,7 +107,8 @@ def __call__( Returns: :class:`~transformers.BatchFeature`: A :class:`~transformers.BatchFeature` with the following fields: - - **pixel_values** -- Pixel values to be fed to a model. + - **pixel_values** -- Pixel values to be fed to a model, of shape (batch_size, num_channels, height, + width). """ # Input type checking for clearer error valid_images = False diff --git a/src/transformers/models/vit/modeling_vit.py b/src/transformers/models/vit/modeling_vit.py index 3584813db62a38..0972a7b7bf3e84 100644 --- a/src/transformers/models/vit/modeling_vit.py +++ b/src/transformers/models/vit/modeling_vit.py @@ -403,9 +403,8 @@ def _init_weights(self, module): VIT_INPUTS_DOCSTRING = r""" Args: pixel_values (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, num_channels, height, width)`): - Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using - :class:`~transformers.ViTFeatureExtractor`. See :meth:`transformers.ViTFeatureExtractor.__call__` for - details. + Pixel values. Pixel values can be obtained using :class:`~transformers.ViTFeatureExtractor`. See + :meth:`transformers.ViTFeatureExtractor.__call__` for details. head_mask (:obj:`torch.FloatTensor` of shape :obj:`(num_heads,)` or :obj:`(num_layers, num_heads)`, `optional`): Mask to nullify selected heads of the self-attention modules. Mask values selected in ``[0, 1]``: diff --git a/tests/fixtures/tests_samples/.gitignore b/tests/fixtures/tests_samples/.gitignore index 46ad771d4530a6..f5030eb61e7c0b 100644 --- a/tests/fixtures/tests_samples/.gitignore +++ b/tests/fixtures/tests_samples/.gitignore @@ -1,7 +1,7 @@ -*.* -cache* -temp* -!*.txt -!*.tsv -!*.json -!.gitignore \ No newline at end of file +*.* +cache* +temp* +!*.txt +!*.tsv +!*.json +!.gitignore \ No newline at end of file diff --git a/tests/fixtures/tests_samples/COCO/cats.png b/tests/fixtures/tests_samples/COCO/cats.png new file mode 100644 index 00000000000000..a3b5225fc3cef5 Binary files /dev/null and b/tests/fixtures/tests_samples/COCO/cats.png differ