Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI] Check test if the GenerationTesterMixin inheritance is correct 🐛 🔫 #36180

Merged
merged 11 commits into from
Feb 21, 2025
5 changes: 1 addition & 4 deletions src/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1645,17 +1645,14 @@ def base_model(self) -> nn.Module:
@classmethod
def can_generate(cls) -> bool:
"""
Returns whether this model can generate sequences with `.generate()`.
Returns whether this model can generate sequences with `.generate()` from the `GenerationMixin`.

Returns:
`bool`: Whether this model can generate sequences with `.generate()`.
"""
# Directly inherits `GenerationMixin` -> can generate
if "GenerationMixin" in str(cls.__bases__):
return True
# Model class overwrites `generate` (e.g. time series models) -> can generate
if str(cls.__name__) in str(cls.generate):
return True
Comment on lines -1696 to -1698
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can_generate() is only used in GenerationMixin-related code. Let's remove time series model from this function.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it completely different or uses part of generate(), like some audio models?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's completely different ☠️

# The class inherits from a class that can generate (recursive check) -> can generate
for base in cls.__bases__:
if not hasattr(base, "can_generate"):
Expand Down
3 changes: 1 addition & 2 deletions src/transformers/models/albert/modeling_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

from ...activations import ACT2FN
from ...generation import GenerationMixin
from ...modeling_attn_mask_utils import _prepare_4d_attention_mask_for_sdpa
from ...modeling_outputs import (
BaseModelOutput,
Expand Down Expand Up @@ -984,7 +983,7 @@ def forward(self, pooled_output: torch.Tensor) -> torch.Tensor:
"Albert Model with a `language modeling` head on top.",
ALBERT_START_DOCSTRING,
)
class AlbertForMaskedLM(AlbertPreTrainedModel, GenerationMixin):
class AlbertForMaskedLM(AlbertPreTrainedModel):
_tied_weights_keys = ["predictions.decoder.bias", "predictions.decoder.weight"]

def __init__(self, config):
Expand Down
8 changes: 4 additions & 4 deletions src/transformers/models/seamless_m4t/modeling_seamless_m4t.py
Original file line number Diff line number Diff line change
Expand Up @@ -2912,7 +2912,7 @@ def _reorder_cache(past_key_values, beam_idx):
"The speech-to-text SeamlessM4T Model transformer which can be used for S2TT.",
SEAMLESS_M4T_START_DOCSTRING,
)
class SeamlessM4TForSpeechToText(SeamlessM4TPreTrainedModel):
class SeamlessM4TForSpeechToText(SeamlessM4TPreTrainedModel, GenerationMixin):
_keys_to_ignore_on_load_missing = ["text_decoder", "t2u_model", "vocoder"]
main_input_name = "input_features"

Expand Down Expand Up @@ -3182,7 +3182,7 @@ def _reorder_cache(past_key_values, beam_idx):
"The text-to-speech SeamlessM4T Model transformer which can be used for T2ST.",
SEAMLESS_M4T_START_DOCSTRING,
)
class SeamlessM4TForTextToSpeech(SeamlessM4TPreTrainedModel):
class SeamlessM4TForTextToSpeech(SeamlessM4TPreTrainedModel, GenerationMixin):
_keys_to_ignore_on_load_missing = ["speech_encoder"]
main_input_name = "input_ids"

Expand Down Expand Up @@ -3511,7 +3511,7 @@ def _reorder_cache(past_key_values, beam_idx):
"The speech-to-speech SeamlessM4T Model transformer which can be used for S2ST.",
SEAMLESS_M4T_START_DOCSTRING,
)
class SeamlessM4TForSpeechToSpeech(SeamlessM4TPreTrainedModel):
class SeamlessM4TForSpeechToSpeech(SeamlessM4TPreTrainedModel, GenerationMixin):
_keys_to_ignore_on_load_missing = ["text_encoder"]
main_input_name = "input_features"

Expand Down Expand Up @@ -3854,7 +3854,7 @@ def _reorder_cache(past_key_values, beam_idx):
Default modality. Used to initialize the model.
""",
)
class SeamlessM4TModel(SeamlessM4TPreTrainedModel):
class SeamlessM4TModel(SeamlessM4TPreTrainedModel, GenerationMixin):
_tied_weights_keys = [
"lm_head.weight",
"text_encoder.embed_tokens.weight",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3192,7 +3192,7 @@ def _reorder_cache(past_key_values, beam_idx):
"The speech-to-text SeamlessM4Tv2 Model transformer which can be used for S2TT.",
SEAMLESS_M4T_V2_START_DOCSTRING,
)
class SeamlessM4Tv2ForSpeechToText(SeamlessM4Tv2PreTrainedModel):
class SeamlessM4Tv2ForSpeechToText(SeamlessM4Tv2PreTrainedModel, GenerationMixin):
_keys_to_ignore_on_load_missing = ["text_decoder", "t2u_model", "vocoder"]
main_input_name = "input_features"

Expand Down Expand Up @@ -3473,7 +3473,7 @@ def _reorder_cache(past_key_values, beam_idx):
"The text-to-speech SeamlessM4Tv2 Model transformer which can be used for T2ST.",
SEAMLESS_M4T_V2_START_DOCSTRING,
)
class SeamlessM4Tv2ForTextToSpeech(SeamlessM4Tv2PreTrainedModel):
class SeamlessM4Tv2ForTextToSpeech(SeamlessM4Tv2PreTrainedModel, GenerationMixin):
_keys_to_ignore_on_load_missing = ["speech_encoder"]
main_input_name = "input_ids"

Expand Down Expand Up @@ -3844,7 +3844,7 @@ def _reorder_cache(past_key_values, beam_idx):
"The speech-to-speech SeamlessM4Tv2 Model transformer which can be used for S2ST.",
SEAMLESS_M4T_V2_START_DOCSTRING,
)
class SeamlessM4Tv2ForSpeechToSpeech(SeamlessM4Tv2PreTrainedModel):
class SeamlessM4Tv2ForSpeechToSpeech(SeamlessM4Tv2PreTrainedModel, GenerationMixin):
_keys_to_ignore_on_load_missing = ["text_encoder"]
main_input_name = "input_features"

Expand Down Expand Up @@ -4229,7 +4229,7 @@ def _reorder_cache(past_key_values, beam_idx):
This will be updated automatically according to the modality passed to the forward and generate passes (`input_ids` for text and `input_features` for audio).
""",
)
class SeamlessM4Tv2Model(SeamlessM4Tv2PreTrainedModel):
class SeamlessM4Tv2Model(SeamlessM4Tv2PreTrainedModel, GenerationMixin):
_tied_weights_keys = [
"lm_head.weight",
"text_encoder.embed_tokens.weight",
Expand Down
37 changes: 36 additions & 1 deletion tests/generation/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import datetime
import gc
import inspect
import random
import tempfile
import unittest
import warnings
Expand Down Expand Up @@ -48,7 +49,6 @@
)
from transformers.utils import is_ipex_available

from ..test_modeling_common import floats_tensor, ids_tensor
from .test_framework_agnostic import GenerationIntegrationTestsMixin


Expand Down Expand Up @@ -2753,6 +2753,41 @@ def test_speculative_sampling_target_distribution(self):
self.assertTrue(last_token_counts[8] > last_token_counts[3])


global_rng = random.Random()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

copied from test_modeling_common, otherwise we would have circular dependencies

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a comment with "copied from" can be added i think

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these used in a lot of places in this file, or just inside one method?

If so, we can probably avoid circular dependencies by importing them within that (single) method ..?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good idea, moving to an internal import to prevent code bloat

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uhmmm local imports would be needed in many places, will go with # Copied from instead

Copy link
Collaborator

@ydshieh ydshieh Feb 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

one another possible approach is not to use

issubclass(self.class, GenerationTesterMixin),

but check the __name__ of all parent classes (but recursively, but maybe there is no bulit-in function to do this?) and see if GenerationTesterMixin is in that set.

Up to you.

p.s. # copied is less maintained and we would like to get away from it (since now we rely more on modular)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's also okay to have a pure copy of the short functions :P It's just a handful of lines, I don't think it's worth the extra work for now -- I will have to refactor these lines when we remove TF (i.e. very soon) 👀

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK 👍



def ids_tensor(shape, vocab_size, rng=None, name=None):
# Creates a random int32 tensor of the shape within the vocab size
if rng is None:
rng = global_rng

total_dims = 1
for dim in shape:
total_dims *= dim

values = []
for _ in range(total_dims):
values.append(rng.randint(0, vocab_size - 1))

return torch.tensor(data=values, dtype=torch.long, device=torch_device).view(shape).contiguous()


def floats_tensor(shape, scale=1.0, rng=None, name=None):
"""Creates a random float32 tensor"""
if rng is None:
rng = global_rng

total_dims = 1
for dim in shape:
total_dims *= dim

values = []
for _ in range(total_dims):
values.append(rng.random() * scale)

return torch.tensor(data=values, dtype=torch.float, device=torch_device).view(shape).contiguous()


@pytest.mark.generate
@require_torch
class GenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTestsMixin):
Expand Down
2 changes: 2 additions & 0 deletions tests/models/big_bird/test_modeling_big_bird.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,8 @@ class BigBirdModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
if is_torch_available()
else ()
)
# Doesn't run generation tests. There are interface mismatches when using `generate` -- TODO @gante
all_generative_model_classes = ()
Comment on lines +454 to +455
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for my understanding: do we need to have empty all_generative_model_classes if no GenerationTesterMixin is added?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If a model inherits GenerationMixin, one of these two must happen:

  1. GenerationTesterMixin must be in the tester
  2. [when the tests are broken, this should NEVER happen on new models] we specify all_generative_model_classes = () to make it very clear we're NOT running generation tests

option 2 is intentionally annoying (we are forced to overwrite a property), so we are very explicit about skipping tests. We don't want skips to happen unless we're very intentional about it.

pipeline_model_mapping = (
{
"feature-extraction": BigBirdModel,
Expand Down
4 changes: 4 additions & 0 deletions tests/models/blip/test_modeling_blip.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,8 @@ def prepare_config_and_inputs_for_common(self):
@require_vision
class BlipVQAModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (BlipForQuestionAnswering,) if is_torch_available() else ()
# Doesn't run generation tests. There are interface mismatches when using `generate` -- TODO @gante
all_generative_model_classes = ()
fx_compatible = False
test_head_masking = False
test_pruning = False
Expand Down Expand Up @@ -1112,6 +1114,8 @@ def test_model_from_pretrained(self):
@require_torch
class BlipTextImageModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (BlipForConditionalGeneration,) if is_torch_available() else ()
# Doesn't run generation tests. There are interface mismatches when using `generate` -- TODO @gante
all_generative_model_classes = ()
fx_compatible = False
test_head_masking = False
test_pruning = False
Expand Down
2 changes: 1 addition & 1 deletion tests/models/blip_2/test_modeling_blip_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -992,7 +992,7 @@ def prepare_config_and_inputs_for_common(self):


@require_torch
class Blip2ModelTest(ModelTesterMixin, PipelineTesterMixin, GenerationTesterMixin, unittest.TestCase):
class Blip2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (Blip2ForConditionalGeneration, Blip2Model) if is_torch_available() else ()
# Doesn't run generation tests. TODO: fix generation tests for Blip2ForConditionalGeneration
all_generative_model_classes = ()
Expand Down
2 changes: 2 additions & 0 deletions tests/models/clvp/test_modeling_clvp.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,8 @@ def prepare_config_and_inputs_for_common(self):
@require_torch
class ClvpModelForConditionalGenerationTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (ClvpModelForConditionalGeneration,) if is_torch_available() else ()
# Doesn't run generation tests. There are interface mismatches when using `generate` -- TODO @gante
all_generative_model_classes = ()

test_head_masking = False
test_pruning = False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from transformers.testing_utils import require_timm, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property

from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor
from ...test_pipeline_mixin import PipelineTesterMixin
Expand Down Expand Up @@ -173,7 +172,7 @@ def create_and_check_conditional_detr_object_detection_head_model(self, config,


@require_torch
class ConditionalDetrModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
class ConditionalDetrModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (
(
ConditionalDetrModel,
Expand Down
2 changes: 2 additions & 0 deletions tests/models/cpmant/test_modeling_cpmant.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ def prepare_config_and_inputs_for_common(self):
@require_torch
class CpmAntModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (CpmAntModel, CpmAntForCausalLM) if is_torch_available() else ()
# Doesn't run generation tests. There are interface mismatches when using `generate` -- TODO @gante
all_generative_model_classes = ()
pipeline_model_mapping = (
{"feature-extraction": CpmAntModel, "text-generation": CpmAntForCausalLM} if is_torch_available() else {}
)
Expand Down
12 changes: 2 additions & 10 deletions tests/models/dab_detr/test_modeling_dab_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from transformers.testing_utils import require_timm, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property

from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor
from ...test_pipeline_mixin import PipelineTesterMixin
Expand Down Expand Up @@ -174,15 +173,8 @@ def create_and_check_dab_detr_object_detection_head_model(self, config, pixel_va


@require_torch
class DabDetrModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (
(
DabDetrModel,
DabDetrForObjectDetection,
)
if is_torch_available()
else ()
)
class DabDetrModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (DabDetrModel, DabDetrForObjectDetection) if is_torch_available() else ()
pipeline_model_mapping = (
{
"image-feature-extraction": DabDetrModel,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from transformers import DecisionTransformerConfig, is_torch_available
from transformers.testing_utils import require_torch, slow, torch_device

from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
from ...test_pipeline_mixin import PipelineTesterMixin
Expand Down Expand Up @@ -125,7 +124,7 @@ def prepare_config_and_inputs_for_common(self):


@require_torch
class DecisionTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
class DecisionTransformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (DecisionTransformerModel,) if is_torch_available() else ()
pipeline_model_mapping = {"feature-extraction": DecisionTransformerModel} if is_torch_available() else {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
torch_device,
)

from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor
from ...test_pipeline_mixin import PipelineTesterMixin
Expand Down Expand Up @@ -188,7 +187,7 @@ def create_and_check_deformable_detr_object_detection_head_model(self, config, p


@require_torch
class DeformableDetrModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
class DeformableDetrModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (DeformableDetrModel, DeformableDetrForObjectDetection) if is_torch_available() else ()
pipeline_model_mapping = (
{"image-feature-extraction": DeformableDetrModel, "object-detection": DeformableDetrForObjectDetection}
Expand Down
3 changes: 1 addition & 2 deletions tests/models/detr/test_modeling_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from transformers.testing_utils import require_timm, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property

from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor
from ...test_pipeline_mixin import PipelineTesterMixin
Expand Down Expand Up @@ -169,7 +168,7 @@ def create_and_check_detr_object_detection_head_model(self, config, pixel_values


@require_torch
class DetrModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
class DetrModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (
(
DetrModel,
Expand Down
2 changes: 2 additions & 0 deletions tests/models/electra/test_modeling_electra.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,8 @@ class ElectraModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
if is_torch_available()
else ()
)
# Doesn't run generation tests. There are interface mismatches when using `generate` -- TODO @gante
all_generative_model_classes = ()
pipeline_model_mapping = (
{
"feature-extraction": ElectraModel,
Expand Down
2 changes: 2 additions & 0 deletions tests/models/flaubert/test_modeling_flaubert.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,8 @@ class FlaubertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
if is_torch_available()
else ()
)
# Doesn't run generation tests. Outdated custom `prepare_inputs_for_generation` -- TODO @gante
all_generative_model_classes = ()
pipeline_model_mapping = (
{
"feature-extraction": FlaubertModel,
Expand Down
8 changes: 8 additions & 0 deletions tests/models/idefics/test_modeling_idefics.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,8 @@ def test_eager_matches_sdpa_generate(self):
@require_torch
class IdeficsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (IdeficsModel, IdeficsForVisionText2Text) if is_torch_available() else ()
# Doesn't run generation tests here -- idefics has a dedicated tester for generation tests below
all_generative_model_classes = ()
pipeline_model_mapping = (
{"feature-extraction": IdeficsModel, "image-text-to-text": IdeficsForVisionText2Text}
if is_torch_available()
Expand Down Expand Up @@ -866,6 +868,12 @@ def test_training_gradient_checkpointing_use_reentrant_false(self):
def test_sdpa_can_dispatch_non_composite_models(self):
pass

@unittest.skip(
"Idefics has a separate test runner for generation tests with complex inheritance, causing this check to fail"
)
def test_generation_tester_mixin_inheritance(self):
pass


@require_torch
@require_vision
Expand Down
3 changes: 1 addition & 2 deletions tests/models/lilt/test_modeling_lilt.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from transformers import LiltConfig, is_torch_available
from transformers.testing_utils import require_torch, slow, torch_device

from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, ids_tensor
from ...test_pipeline_mixin import PipelineTesterMixin
Expand Down Expand Up @@ -218,7 +217,7 @@ def prepare_config_and_inputs_for_common(self):


@require_torch
class LiltModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
class LiltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
all_model_classes = (
(
LiltModel,
Expand Down
2 changes: 2 additions & 0 deletions tests/models/megatron_bert/test_modeling_megatron_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,8 @@ class MegatronBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Test
if is_torch_available()
else ()
)
# Doesn't run generation tests. There are interface mismatches when using `generate` -- TODO @gante
all_generative_model_classes = ()
pipeline_model_mapping = (
{
"feature-extraction": MegatronBertModel,
Expand Down
3 changes: 1 addition & 2 deletions tests/models/modernbert/test_modeling_modernbert.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
torch_device,
)

from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, ids_tensor, random_attention_mask
from ...test_pipeline_mixin import PipelineTesterMixin
Expand Down Expand Up @@ -216,7 +215,7 @@ def prepare_config_and_inputs_for_common(self):


@require_torch
class ModernBertModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
class ModernBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
test_torchscript = False

all_model_classes = (
Expand Down
Loading