From 8c274e5e6663eb0eafecb54c47f951337a9859ed Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Fri, 5 Apr 2024 17:00:29 +0200 Subject: [PATCH 1/2] gh-116608: Ignore UTF-16 BOM in importlib.resources._functional tests (GH-117569) gh-116609: Ignore UTF-16 BOM in importlib.resources._functional tests To test the `errors` argument, we read a UTF-16 file as UTF-8 with "backslashreplace" error handling. However, the utf-16 codec adds an endian-specific byte-order mark, so on big-endian machines the expectation doesn't match the test file (which was saved on a little-endian machine). Use endswith to ignore the BOM. --- importlib_resources/tests/test_functional.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/importlib_resources/tests/test_functional.py b/importlib_resources/tests/test_functional.py index 69706cf..11a4596 100644 --- a/importlib_resources/tests/test_functional.py +++ b/importlib_resources/tests/test_functional.py @@ -36,6 +36,12 @@ def _gen_resourcetxt_path_parts(self): with self.subTest(path_parts=path_parts): yield path_parts + def assertEndsWith(self, string, suffix): + """Assert that `string` ends with `suffix`. + + Used to ignore an architecture-specific UTF-16 byte-order mark.""" + self.assertEqual(string[-len(suffix):], suffix) + def test_read_text(self): self.assertEqual( resources.read_text(self.anchor01, 'utf-8.file'), @@ -76,13 +82,13 @@ def test_read_text(self): ), '\x00\x01\x02\x03', ) - self.assertEqual( + self.assertEndsWith( # ignore the BOM resources.read_text( self.anchor01, 'utf-16.file', errors='backslashreplace', ), - 'Hello, UTF-16 world!\n'.encode('utf-16').decode( + 'Hello, UTF-16 world!\n'.encode('utf-16-le').decode( errors='backslashreplace', ), ) @@ -128,9 +134,9 @@ def test_open_text(self): 'utf-16.file', errors='backslashreplace', ) as f: - self.assertEqual( + self.assertEndsWith( # ignore the BOM f.read(), - 'Hello, UTF-16 world!\n'.encode('utf-16').decode( + 'Hello, UTF-16 world!\n'.encode('utf-16-le').decode( errors='backslashreplace', ), ) From b44af23d1734a881c3a065b39d5bc0187154ce7b Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Wed, 14 Aug 2024 16:54:38 -0400 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=91=B9=20Feed=20the=20hobgoblins=20(d?= =?UTF-8?q?elint).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- importlib_resources/tests/test_functional.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/importlib_resources/tests/test_functional.py b/importlib_resources/tests/test_functional.py index 11a4596..51dd52a 100644 --- a/importlib_resources/tests/test_functional.py +++ b/importlib_resources/tests/test_functional.py @@ -40,7 +40,7 @@ def assertEndsWith(self, string, suffix): """Assert that `string` ends with `suffix`. Used to ignore an architecture-specific UTF-16 byte-order mark.""" - self.assertEqual(string[-len(suffix):], suffix) + self.assertEqual(string[-len(suffix) :], suffix) def test_read_text(self): self.assertEqual(