From 2c695cad41022285ec1b032b4cb7d6e464da2dc3 Mon Sep 17 00:00:00 2001
From: Sai-Suraj-27 <sai.suraj.27.729@gmail.com>
Date: Fri, 16 Aug 2024 20:09:45 +0530
Subject: [PATCH 1/3] Fixed failing CodeGenTokenizationTest::test_truncation.

---
 tests/models/codegen/test_tokenization_codegen.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/models/codegen/test_tokenization_codegen.py b/tests/models/codegen/test_tokenization_codegen.py
index 4832bf1962e4..184c75216290 100644
--- a/tests/models/codegen/test_tokenization_codegen.py
+++ b/tests/models/codegen/test_tokenization_codegen.py
@@ -254,12 +254,12 @@ def test_truncation(self):
         tokenizer = CodeGenTokenizer.from_pretrained("Salesforce/codegen-350M-mono")
 
         text = "\nif len_a > len_b:\n    result = a\nelse:\n    result = b\n\n\n\n#"
-        expected_trucated_text = "\nif len_a > len_b:      result = a\nelse:      result = b"
+        expected_truncated_text = "\nif len_a > len_b:\n      result = a\nelse:\n      result = b"
 
         input_ids = tokenizer.encode(text)
         truncation_pattern = ["^#", re.escape("<|endoftext|>"), "^'''", '^"""', "\n\n\n"]
         decoded_text = tokenizer.decode(input_ids, truncate_before_pattern=truncation_pattern)
-        self.assertEqual(decoded_text, expected_trucated_text)
+        self.assertEqual(decoded_text, expected_truncated_text)
         # TODO @ArthurZ outputs of the fast tokenizer are different in this case, un-related to the PR
 
     # tokenizer has no padding token

From e38e14851d90b11655c9a684fd7606c818bd511c Mon Sep 17 00:00:00 2001
From: Sai-Suraj-27 <sai.suraj.27.729@gmail.com>
Date: Fri, 16 Aug 2024 22:10:57 +0530
Subject: [PATCH 2/3] [run_slow] Codegen


From 82b8041bb686d0146b1ce26065dfcf098cfdae04 Mon Sep 17 00:00:00 2001
From: Sai-Suraj-27 <sai.suraj.27.729@gmail.com>
Date: Sat, 17 Aug 2024 22:14:07 +0530
Subject: [PATCH 3/3] [run_slow] codegen