separate the logits calculation logic and loss calculation logic

huggingface · amyeroberts · Sep 14, 2023 · May 10, 2023 · May 10, 2023 · May 10, 2023
commit 0cb524f15e33f22ce7204c48a1d83f5ea8fae5fb
diff --git a/src/transformers/models/bros/modeling_bros.py b/src/transformers/models/bros/modeling_bros.py
@@ -1122,35 +1122,32 @@ def forward(
         itc_outputs = self.itc_layer(last_hidden_states).transpose(0, 1).contiguous()
         stc_outputs = self.stc_layer(last_hidden_states, last_hidden_states).squeeze(0)
 
+        itc_logits = itc_outputs.view(-1, self.num_labels)
+
+        # calculate stc_logits
+        inv_attention_mask = 1 - attention_mask
+        bsz, max_seq_length = inv_attention_mask.shape
-        bsz, max_seq_length = inv_attention_mask.shape
+        batch_size, max_seq_length = inv_attention_mask.shape
-        bsz, max_seq_length = inv_attention_mask.shape
+        batch_size, max_seq_length = inv_attention_mask.shape
+        device = inv_attention_mask.device
+        invalid_token_mask = torch.cat([inv_attention_mask, torch.zeros([bsz, 1]).to(device)], axis=1).bool()
+        stc_outputs.masked_fill_(invalid_token_mask[:, None, :], -10000.0)
+        self_token_mask = torch.eye(max_seq_length, max_seq_length + 1).to(device).bool()
+        stc_outputs.masked_fill_(self_token_mask[None, :, :], -10000.0)
+        stc_mask = attention_mask.view(-1).bool()
+        stc_logits = stc_outputs.view(-1, max_seq_length + 1)
+
         loss = None
         if itc_labels is not None and stc_labels is not None:
             loss_fct = CrossEntropyLoss()
 
             # get itc loss
-            itc_logits = itc_outputs.view(-1, self.num_labels)
             itc_labels = itc_labels.view(-1)
             if itc_mask is not None:
                 itc_mask = itc_mask.view(-1)
                 itc_loss = loss_fct(itc_logits[itc_mask], itc_labels[itc_mask])
             else:
                 itc_loss = loss_fct(itc_logits, itc_labels)
 
-            # get stc loss
-            inv_attention_mask = 1 - attention_mask
-
-            bsz, max_seq_length = inv_attention_mask.shape
-            device = inv_attention_mask.device
-
-            invalid_token_mask = torch.cat([inv_attention_mask, torch.zeros([bsz, 1]).to(device)], axis=1).bool()
-            stc_outputs.masked_fill_(invalid_token_mask[:, None, :], -10000.0)
-
-            self_token_mask = torch.eye(max_seq_length, max_seq_length + 1).to(device).bool()
-            stc_outputs.masked_fill_(self_token_mask[None, :, :], -10000.0)
-
-            stc_mask = attention_mask.view(-1).bool()
-            stc_logits = stc_outputs.view(-1, max_seq_length + 1)
             stc_labels = stc_labels.view(-1)
-
             stc_loss = loss_fct(stc_logits[stc_mask], stc_labels[stc_mask])
 
             loss = itc_loss + stc_loss

diff --git a/tests/models/bros/test_modeling_bros.py b/tests/models/bros/test_modeling_bros.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 """ Testing suite for the PyTorch Bros model. """
 
-
+import copy
 import unittest
 
 from transformers import BrosConfig, is_torch_available
@@ -29,6 +29,8 @@
 
     from transformers import (
         BrosForTokenClassification,
+        BrosSpadeEEForTokenClassification,
+        BrosSpadeELForTokenClassification,
         BrosModel,
     )
     from transformers.models.bros.modeling_bros import (
@@ -162,6 +164,12 @@ def create_and_check_for_token_classification(
         )
         self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
 
+    def create_and_check_for_spade_ee_token_classification(self):
+        ...
+
+    def create_and_check_for_spade_el_token_classification(self):
+        ...
+
     def prepare_config_and_inputs_for_common(self):
         config_and_inputs = self.prepare_config_and_inputs()
         (
@@ -185,9 +193,15 @@ def prepare_config_and_inputs_for_common(self):
 
 @require_torch
 class BrosModelTest(ModelTesterMixin, unittest.TestCase):
+    test_pruning = False
+    test_torchscript = False
+    test_mismatched_shapes = False
+
     all_model_classes = (
         (
             BrosForTokenClassification,
+            BrosSpadeEEForTokenClassification,
+            BrosSpadeELForTokenClassification,
             BrosModel,
         )
         if is_torch_available()
@@ -199,13 +213,25 @@ def setUp(self):
         self.model_tester = BrosModelTester(self)
         self.config_tester = ConfigTester(self, config_class=BrosConfig, hidden_size=37)
 
+    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
+        inputs_dict = copy.deepcopy(inputs_dict)
+
+        if return_labels:
+            ...
+        ...
+
+        return inputs_dict
+
     def test_config(self):
         self.config_tester.run_common_tests()
 
     def test_model(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.create_and_check_model(*config_and_inputs)
 
+    def test_multi_gpu_data_parallel_forward(self):
+        pass
+
     def test_model_various_embeddings(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         for type in ["absolute", "relative_key", "relative_key_query"]:
@@ -216,12 +242,27 @@ def test_for_token_classification(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
 
+    def test_for_spade_ee_token_classification(self):
+        ...
+
+    def test_for_spade_el_token_classification(self):
+        ...
+
+    def test_attention_outputs(self):
+        ...
+
+    def test_hidden_states_output(self):
+        ...
+
     @slow
     def test_model_from_pretrained(self):
         for model_name in BROS_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
             model = BrosModel.from_pretrained(model_name)
             self.assertIsNotNone(model)
 
+    def test_initialization(self):
+        ...
+
 
 def prepare_bros_batch_inputs():
     attention_mask = torch.tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])