huggingface · amyeroberts · May 17, 2024 · May 16, 2024 · May 16, 2024
diff --git a/src/transformers/models/video_llava/modeling_video_llava.py b/src/transformers/models/video_llava/modeling_video_llava.py
@@ -124,6 +124,7 @@ class VideoLlavaPreTrainedModel(PreTrainedModel):
     supports_gradient_checkpointing = True
     _skip_keys_device_placement = "past_key_values"
     _supports_flash_attn_2 = True
+    _no_split_modules = ["VideoLlavaVisionAttention"]
 
     def _init_weights(self, module):
         # important: this ported version of VideoLlava isn't meant for training from scratch - only