From aab0a194014f8db7cfccbe947dfec289435564bd Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DESBAS Date: Fri, 13 Oct 2023 14:51:01 +0200 Subject: [PATCH 01/11] add target encoding --- clevercsv/console/commands/standardize.py | 40 +++++++++++++++-------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/clevercsv/console/commands/standardize.py b/clevercsv/console/commands/standardize.py index 002eb7b..6272518 100644 --- a/clevercsv/console/commands/standardize.py +++ b/clevercsv/console/commands/standardize.py @@ -69,6 +69,11 @@ def register(self) -> None: ), default=[], ) + self.add_argument( + "-c", + "--convert_encoding", + type=str + ) self.add_argument( "-i", "--in-place", @@ -115,6 +120,7 @@ def handle(self) -> int: encodings = self.args.encoding num_chars = parse_int(self.args.num_chars, "num-chars") in_place = self.args.in_place + target_encoding = self.args.convert_encoding if in_place and outputs: print( @@ -154,6 +160,7 @@ def handle(self) -> int: encoding=encoding, verbose=verbose, num_chars=num_chars, + target_encoding=target_encoding ) if retval > 0 and global_retval == 0: global_retval = retval @@ -168,6 +175,7 @@ def handle_path( encoding: Optional[str] = None, num_chars: Optional[int] = None, verbose: bool = False, + target_encoding: Optional[str] = None ) -> int: encoding = encoding or get_encoding(path) dialect = detect_dialect( @@ -178,10 +186,10 @@ def handle_path( return 1 if self.args.in_place: - return self._in_place(path, dialect, encoding) + return self._in_place(path, dialect, encoding, target_encoding) elif output is None: - return self._to_stdout(path, dialect, encoding) - return self._to_file(path, output, dialect, encoding) + return self._to_stdout(path, dialect, encoding, target_encoding) + return self._to_file(path, output, dialect, encoding, target_encoding) def _write_transposed( self, @@ -189,12 +197,13 @@ def _write_transposed( stream: SupportsWrite[str], dialect: SimpleDialect, encoding: Optional[str], + target_encoding: Optional[str] ) -> None: with open(path, "r", newline="", encoding=encoding) as fp: read = reader(fp, dialect=dialect) rows = list(read) rows = list(map(list, zip(*rows))) - write = writer(stream, dialect="excel") + write = writer(stream, dialect="excel", encoding=target_encoding or encoding) for row in rows: write.writerow(row) @@ -204,10 +213,11 @@ def _write_direct( stream: SupportsWrite[str], dialect: SimpleDialect, encoding: Optional[str], + target_encoding: Optional[str] ) -> None: with open(path, "r", newline="", encoding=encoding) as fp: read = reader(fp, dialect=dialect) - write = writer(stream, dialect="excel") + write = writer(stream, dialect="excel", encoding=target_encoding or encoding) for row in read: write.writerow(row) @@ -217,14 +227,15 @@ def _write_to_stream( stream: SupportsWrite[str], dialect: SimpleDialect, encoding: Optional[str], + target_encoding: Optional[str] ) -> None: if self.args.transpose: - self._write_transposed(path, stream, dialect, encoding) + self._write_transposed(path, stream, dialect, encoding, target_encoding) else: - self._write_direct(path, stream, dialect, encoding) + self._write_direct(path, stream, dialect, encoding, target_encoding) def _in_place( - self, path: StrPath, dialect: SimpleDialect, encoding: Optional[str] + self, path: StrPath, dialect: SimpleDialect, encoding: Optional[str], target_encoding: Optional[str] ) -> int: """In-place mode overwrites the input file, if necessary @@ -235,8 +246,8 @@ def _in_place( """ tmpfd, tmpfname = tempfile.mkstemp(prefix="clevercsv_", suffix=".csv") - tmpid = os.fdopen(tmpfd, "w", newline="", encoding=encoding) - self._write_to_stream(path, tmpid, dialect, encoding) + tmpid = os.fdopen(tmpfd, "w", newline="", encoding=target_encoding or encoding) + self._write_to_stream(path, tmpid, dialect, encoding, target_encoding) tmpid.close() previous_sha1 = sha1sum(path) @@ -249,10 +260,10 @@ def _in_place( return 2 def _to_stdout( - self, path: StrPath, dialect: SimpleDialect, encoding: Optional[str] + self, path: StrPath, dialect: SimpleDialect, encoding: Optional[str], target_encoding: Optional[str] ) -> int: stream = io.StringIO(newline="") - self._write_to_stream(path, stream, dialect, encoding) + self._write_to_stream(path, stream, dialect, encoding, target_encoding) print(stream.getvalue(), end="") stream.close() return 0 @@ -263,7 +274,8 @@ def _to_file( output: StrPath, dialect: SimpleDialect, encoding: Optional[str], + target_encoding: Optional[str] ) -> int: - with open(output, "w", newline="", encoding=encoding) as fp: - self._write_to_stream(path, fp, dialect, encoding) + with open(output, "w", newline="", encoding=target_encoding or encoding) as fp: + self._write_to_stream(path, fp, dialect, encoding, target_encoding) return 0 From 3cad7327e79f24653add7364417c834f13a49100 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DESBAS Date: Fri, 13 Oct 2023 16:35:14 +0200 Subject: [PATCH 02/11] docstring and change arg name --- clevercsv/console/commands/standardize.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/clevercsv/console/commands/standardize.py b/clevercsv/console/commands/standardize.py index 6272518..4de6cf3 100644 --- a/clevercsv/console/commands/standardize.py +++ b/clevercsv/console/commands/standardize.py @@ -70,8 +70,13 @@ def register(self) -> None: default=[], ) self.add_argument( - "-c", - "--convert_encoding", + "-E", + "--target_encoding", + help="Set the encoding of the output file(s)", + description=( + "If ommited, the output file encoding while be the same " + "as that of the original file." + ), type=str ) self.add_argument( @@ -120,7 +125,7 @@ def handle(self) -> int: encodings = self.args.encoding num_chars = parse_int(self.args.num_chars, "num-chars") in_place = self.args.in_place - target_encoding = self.args.convert_encoding + target_encoding = self.args.target_encoding if in_place and outputs: print( From a43a77768bef1faff0eb4b6055f6d3c4acff4745 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DESBAS Date: Thu, 16 Nov 2023 22:12:08 +0100 Subject: [PATCH 03/11] fix error when write to_file, factorize 'target_encoding or encoding' --- clevercsv/console/commands/standardize.py | 32 +++++++++++------------ 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/clevercsv/console/commands/standardize.py b/clevercsv/console/commands/standardize.py index 4de6cf3..dfccf51 100644 --- a/clevercsv/console/commands/standardize.py +++ b/clevercsv/console/commands/standardize.py @@ -183,6 +183,7 @@ def handle_path( target_encoding: Optional[str] = None ) -> int: encoding = encoding or get_encoding(path) + target_encoding = target_encoding or encoding dialect = detect_dialect( path, num_chars=num_chars, encoding=encoding, verbose=verbose ) @@ -193,7 +194,7 @@ def handle_path( if self.args.in_place: return self._in_place(path, dialect, encoding, target_encoding) elif output is None: - return self._to_stdout(path, dialect, encoding, target_encoding) + return self._to_stdout(path, dialect, encoding) return self._to_file(path, output, dialect, encoding, target_encoding) def _write_transposed( @@ -201,14 +202,13 @@ def _write_transposed( path: StrPath, stream: SupportsWrite[str], dialect: SimpleDialect, - encoding: Optional[str], - target_encoding: Optional[str] + encoding: Optional[str] ) -> None: with open(path, "r", newline="", encoding=encoding) as fp: read = reader(fp, dialect=dialect) rows = list(read) rows = list(map(list, zip(*rows))) - write = writer(stream, dialect="excel", encoding=target_encoding or encoding) + write = writer(stream, dialect="excel") for row in rows: write.writerow(row) @@ -217,12 +217,11 @@ def _write_direct( path: StrPath, stream: SupportsWrite[str], dialect: SimpleDialect, - encoding: Optional[str], - target_encoding: Optional[str] + encoding: Optional[str] ) -> None: with open(path, "r", newline="", encoding=encoding) as fp: read = reader(fp, dialect=dialect) - write = writer(stream, dialect="excel", encoding=target_encoding or encoding) + write = writer(stream, dialect="excel") for row in read: write.writerow(row) @@ -231,13 +230,12 @@ def _write_to_stream( path: StrPath, stream: SupportsWrite[str], dialect: SimpleDialect, - encoding: Optional[str], - target_encoding: Optional[str] + encoding: Optional[str] ) -> None: if self.args.transpose: - self._write_transposed(path, stream, dialect, encoding, target_encoding) + self._write_transposed(path, stream, dialect, encoding) else: - self._write_direct(path, stream, dialect, encoding, target_encoding) + self._write_direct(path, stream, dialect, encoding) def _in_place( self, path: StrPath, dialect: SimpleDialect, encoding: Optional[str], target_encoding: Optional[str] @@ -251,8 +249,8 @@ def _in_place( """ tmpfd, tmpfname = tempfile.mkstemp(prefix="clevercsv_", suffix=".csv") - tmpid = os.fdopen(tmpfd, "w", newline="", encoding=target_encoding or encoding) - self._write_to_stream(path, tmpid, dialect, encoding, target_encoding) + tmpid = os.fdopen(tmpfd, "w", newline="", encoding=target_encoding) + self._write_to_stream(path, tmpid, dialect, encoding) tmpid.close() previous_sha1 = sha1sum(path) @@ -265,10 +263,10 @@ def _in_place( return 2 def _to_stdout( - self, path: StrPath, dialect: SimpleDialect, encoding: Optional[str], target_encoding: Optional[str] + self, path: StrPath, dialect: SimpleDialect, encoding: Optional[str] ) -> int: stream = io.StringIO(newline="") - self._write_to_stream(path, stream, dialect, encoding, target_encoding) + self._write_to_stream(path, stream, dialect, encoding) print(stream.getvalue(), end="") stream.close() return 0 @@ -281,6 +279,6 @@ def _to_file( encoding: Optional[str], target_encoding: Optional[str] ) -> int: - with open(output, "w", newline="", encoding=target_encoding or encoding) as fp: - self._write_to_stream(path, fp, dialect, encoding, target_encoding) + with open(output, "w", newline="", encoding=target_encoding) as fp: + self._write_to_stream(path, fp, dialect, encoding) return 0 From 62e5c330e9ec34d19a82a9047388a5b736962637 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DESBAS Date: Fri, 17 Nov 2023 21:24:38 +0100 Subject: [PATCH 04/11] target_encoding -> target-encoding --- clevercsv/console/commands/standardize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clevercsv/console/commands/standardize.py b/clevercsv/console/commands/standardize.py index dfccf51..1f77197 100644 --- a/clevercsv/console/commands/standardize.py +++ b/clevercsv/console/commands/standardize.py @@ -71,7 +71,7 @@ def register(self) -> None: ) self.add_argument( "-E", - "--target_encoding", + "--target-encoding", help="Set the encoding of the output file(s)", description=( "If ommited, the output file encoding while be the same " From b13802a84d573c1f39c0c87fbec27ec9ef92eeed Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DESBAS Date: Fri, 17 Nov 2023 22:03:28 +0100 Subject: [PATCH 05/11] test target_encoding --- tests/test_unit/test_console.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/test_unit/test_console.py b/tests/test_unit/test_console.py index ef67f2d..a53aab8 100644 --- a/tests/test_unit/test_console.py +++ b/tests/test_unit/test_console.py @@ -640,3 +640,28 @@ def test_standardize_in_place_multi_noop(self) -> None: self.assertEqual(contents, exp) finally: any(map(os.unlink, tmpfnames)) + + def test_standardize_target_encoding(self) -> None: + table: TableType = [["Å", "B", "C"], ['é', 'ü', '中'], [4, 5, 6]] + dialect = SimpleDialect(delimiter=";", quotechar="", escapechar="") + encoding='utf-8' + tmpfname = self._build_file(table, dialect, encoding=encoding) + + tmpfd, tmpoutname = tempfile.mkstemp(prefix="ccsv_", suffix=".csv") + os.close(tmpfd) + + application = build_application() + tester = Tester(application) + tester.test_command("standardize", ["-o", tmpoutname, '-E', 'utf-8', tmpfname]) + + # Excel format (i.e. RFC4180) *requires* CRLF + crlf = "\r\n" + exp = crlf.join(["Å,B,C", "é,ü,中", "4,5,6", ""]) + with open(tmpoutname, "r", newline="") as fp: + output = fp.read() + + try: + self.assertEqual(exp, output) + finally: + os.unlink(tmpfname) + os.unlink(tmpoutname) From 473fb9b040514cf8b73a9b82492cad7263afca1e Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DESBAS Date: Fri, 17 Nov 2023 22:05:20 +0100 Subject: [PATCH 06/11] test target_encoding raise UnicodeEncodeError --- tests/test_unit/test_console.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/test_unit/test_console.py b/tests/test_unit/test_console.py index a53aab8..b9f802b 100644 --- a/tests/test_unit/test_console.py +++ b/tests/test_unit/test_console.py @@ -665,3 +665,21 @@ def test_standardize_target_encoding(self) -> None: finally: os.unlink(tmpfname) os.unlink(tmpoutname) + + def test_standardize_target_encoding_raise_UnicodeEncodeError(self) -> None: + table: TableType = [["Å", "B", "C"], ['é', 'ü', '中'], [4, 5, 6]] + dialect = SimpleDialect(delimiter=";", quotechar="", escapechar="") + encoding='utf-8' + tmpfname = self._build_file(table, dialect, encoding=encoding) + + tmpfd, tmpoutname = tempfile.mkstemp(prefix="ccsv_", suffix=".csv") + os.close(tmpfd) + + application = build_application() + tester = Tester(application) + try : + with self.assertRaises(UnicodeEncodeError): + tester.test_command("standardize", ["-o", tmpoutname, '-E', 'latin-1', tmpfname]) + finally: + os.unlink(tmpfname) + os.unlink(tmpoutname) From 16daba2bb1dbe328a1bf2b8b7ac6adb5472e622e Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DESBAS Date: Fri, 17 Nov 2023 22:11:31 +0100 Subject: [PATCH 07/11] revert unnecessary changes --- clevercsv/console/commands/standardize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clevercsv/console/commands/standardize.py b/clevercsv/console/commands/standardize.py index 1f77197..a767acb 100644 --- a/clevercsv/console/commands/standardize.py +++ b/clevercsv/console/commands/standardize.py @@ -202,7 +202,7 @@ def _write_transposed( path: StrPath, stream: SupportsWrite[str], dialect: SimpleDialect, - encoding: Optional[str] + encoding: Optional[str], ) -> None: with open(path, "r", newline="", encoding=encoding) as fp: read = reader(fp, dialect=dialect) @@ -217,7 +217,7 @@ def _write_direct( path: StrPath, stream: SupportsWrite[str], dialect: SimpleDialect, - encoding: Optional[str] + encoding: Optional[str], ) -> None: with open(path, "r", newline="", encoding=encoding) as fp: read = reader(fp, dialect=dialect) @@ -230,7 +230,7 @@ def _write_to_stream( path: StrPath, stream: SupportsWrite[str], dialect: SimpleDialect, - encoding: Optional[str] + encoding: Optional[str], ) -> None: if self.args.transpose: self._write_transposed(path, stream, dialect, encoding) From e293cae757fd12c5ba60bfc40c5035e00a8183bf Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DESBAS Date: Fri, 17 Nov 2023 22:19:33 +0100 Subject: [PATCH 08/11] test target_encoding2 --- tests/test_unit/test_console.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/test_unit/test_console.py b/tests/test_unit/test_console.py index b9f802b..d735737 100644 --- a/tests/test_unit/test_console.py +++ b/tests/test_unit/test_console.py @@ -666,6 +666,31 @@ def test_standardize_target_encoding(self) -> None: os.unlink(tmpfname) os.unlink(tmpoutname) + def test_standardize_target_encoding2(self) -> None: + table: TableType = [["A", "B", "C"], ['é', 'è', 'à'], [4, 5, 6]] + dialect = SimpleDialect(delimiter=";", quotechar="", escapechar="") + encoding='latin-1' + tmpfname = self._build_file(table, dialect, encoding=encoding) + + tmpfd, tmpoutname = tempfile.mkstemp(prefix="ccsv_", suffix=".csv") + os.close(tmpfd) + + application = build_application() + tester = Tester(application) + tester.test_command("standardize", ["-o", tmpoutname, '-e', 'latin-1', '-E', 'utf-8', tmpfname]) + + # Excel format (i.e. RFC4180) *requires* CRLF + crlf = "\r\n" + exp = crlf.join(["A,B,C", "é,è,à", "4,5,6", ""]) + with open(tmpoutname, "r", newline="") as fp: + output = fp.read() + + try: + self.assertEqual(exp, output) + finally: + os.unlink(tmpfname) + os.unlink(tmpoutname) + def test_standardize_target_encoding_raise_UnicodeEncodeError(self) -> None: table: TableType = [["Å", "B", "C"], ['é', 'ü', '中'], [4, 5, 6]] dialect = SimpleDialect(delimiter=";", quotechar="", escapechar="") From ac5554408b56cc7fe7016d92eaef59521604f379 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DESBAS Date: Wed, 29 Nov 2023 21:01:21 +0100 Subject: [PATCH 09/11] add detected encoding assertion --- tests/test_unit/test_console.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/test_unit/test_console.py b/tests/test_unit/test_console.py index d735737..fa50af4 100644 --- a/tests/test_unit/test_console.py +++ b/tests/test_unit/test_console.py @@ -21,6 +21,7 @@ from clevercsv._types import _DialectLike from clevercsv.console import build_application from clevercsv.dialect import SimpleDialect +from clevercsv.encoding import get_encoding from clevercsv.write import writer TableType = List[List[Any]] @@ -671,7 +672,7 @@ def test_standardize_target_encoding2(self) -> None: dialect = SimpleDialect(delimiter=";", quotechar="", escapechar="") encoding='latin-1' tmpfname = self._build_file(table, dialect, encoding=encoding) - + self.assertEqual("ISO-8859-1", get_encoding(tmpfname, try_cchardet=False)) tmpfd, tmpoutname = tempfile.mkstemp(prefix="ccsv_", suffix=".csv") os.close(tmpfd) @@ -682,14 +683,19 @@ def test_standardize_target_encoding2(self) -> None: # Excel format (i.e. RFC4180) *requires* CRLF crlf = "\r\n" exp = crlf.join(["A,B,C", "é,è,à", "4,5,6", ""]) + + self.assertEqual("utf-8", get_encoding(tmpoutname, try_cchardet=False)) with open(tmpoutname, "r", newline="") as fp: output = fp.read() try: self.assertEqual(exp, output) + finally: os.unlink(tmpfname) os.unlink(tmpoutname) + + def test_standardize_target_encoding_raise_UnicodeEncodeError(self) -> None: table: TableType = [["Å", "B", "C"], ['é', 'ü', '中'], [4, 5, 6]] From 92cb49ebe51e7e96f46aba8cb1d355795a7a4a24 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DESBAS Date: Wed, 29 Nov 2023 21:05:27 +0100 Subject: [PATCH 10/11] fix formating errors with black --- clevercsv/console/commands/standardize.py | 16 +++++---- tests/test_unit/test_console.py | 40 ++++++++++++++--------- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/clevercsv/console/commands/standardize.py b/clevercsv/console/commands/standardize.py index a767acb..f25c9e4 100644 --- a/clevercsv/console/commands/standardize.py +++ b/clevercsv/console/commands/standardize.py @@ -75,9 +75,9 @@ def register(self) -> None: help="Set the encoding of the output file(s)", description=( "If ommited, the output file encoding while be the same " - "as that of the original file." + "as that of the original file." ), - type=str + type=str, ) self.add_argument( "-i", @@ -165,7 +165,7 @@ def handle(self) -> int: encoding=encoding, verbose=verbose, num_chars=num_chars, - target_encoding=target_encoding + target_encoding=target_encoding, ) if retval > 0 and global_retval == 0: global_retval = retval @@ -180,7 +180,7 @@ def handle_path( encoding: Optional[str] = None, num_chars: Optional[int] = None, verbose: bool = False, - target_encoding: Optional[str] = None + target_encoding: Optional[str] = None, ) -> int: encoding = encoding or get_encoding(path) target_encoding = target_encoding or encoding @@ -238,7 +238,11 @@ def _write_to_stream( self._write_direct(path, stream, dialect, encoding) def _in_place( - self, path: StrPath, dialect: SimpleDialect, encoding: Optional[str], target_encoding: Optional[str] + self, + path: StrPath, + dialect: SimpleDialect, + encoding: Optional[str], + target_encoding: Optional[str], ) -> int: """In-place mode overwrites the input file, if necessary @@ -277,7 +281,7 @@ def _to_file( output: StrPath, dialect: SimpleDialect, encoding: Optional[str], - target_encoding: Optional[str] + target_encoding: Optional[str], ) -> int: with open(output, "w", newline="", encoding=target_encoding) as fp: self._write_to_stream(path, fp, dialect, encoding) diff --git a/tests/test_unit/test_console.py b/tests/test_unit/test_console.py index fa50af4..3151694 100644 --- a/tests/test_unit/test_console.py +++ b/tests/test_unit/test_console.py @@ -643,9 +643,9 @@ def test_standardize_in_place_multi_noop(self) -> None: any(map(os.unlink, tmpfnames)) def test_standardize_target_encoding(self) -> None: - table: TableType = [["Å", "B", "C"], ['é', 'ü', '中'], [4, 5, 6]] + table: TableType = [["Å", "B", "C"], ["é", "ü", "中"], [4, 5, 6]] dialect = SimpleDialect(delimiter=";", quotechar="", escapechar="") - encoding='utf-8' + encoding = "utf-8" tmpfname = self._build_file(table, dialect, encoding=encoding) tmpfd, tmpoutname = tempfile.mkstemp(prefix="ccsv_", suffix=".csv") @@ -653,7 +653,9 @@ def test_standardize_target_encoding(self) -> None: application = build_application() tester = Tester(application) - tester.test_command("standardize", ["-o", tmpoutname, '-E', 'utf-8', tmpfname]) + tester.test_command( + "standardize", ["-o", tmpoutname, "-E", "utf-8", tmpfname] + ) # Excel format (i.e. RFC4180) *requires* CRLF crlf = "\r\n" @@ -668,22 +670,27 @@ def test_standardize_target_encoding(self) -> None: os.unlink(tmpoutname) def test_standardize_target_encoding2(self) -> None: - table: TableType = [["A", "B", "C"], ['é', 'è', 'à'], [4, 5, 6]] + table: TableType = [["A", "B", "C"], ["é", "è", "à"], [4, 5, 6]] dialect = SimpleDialect(delimiter=";", quotechar="", escapechar="") - encoding='latin-1' + encoding = "latin-1" tmpfname = self._build_file(table, dialect, encoding=encoding) - self.assertEqual("ISO-8859-1", get_encoding(tmpfname, try_cchardet=False)) + self.assertEqual( + "ISO-8859-1", get_encoding(tmpfname, try_cchardet=False) + ) tmpfd, tmpoutname = tempfile.mkstemp(prefix="ccsv_", suffix=".csv") os.close(tmpfd) application = build_application() tester = Tester(application) - tester.test_command("standardize", ["-o", tmpoutname, '-e', 'latin-1', '-E', 'utf-8', tmpfname]) + tester.test_command( + "standardize", + ["-o", tmpoutname, "-e", "latin-1", "-E", "utf-8", tmpfname], + ) # Excel format (i.e. RFC4180) *requires* CRLF crlf = "\r\n" exp = crlf.join(["A,B,C", "é,è,à", "4,5,6", ""]) - + self.assertEqual("utf-8", get_encoding(tmpoutname, try_cchardet=False)) with open(tmpoutname, "r", newline="") as fp: output = fp.read() @@ -694,13 +701,13 @@ def test_standardize_target_encoding2(self) -> None: finally: os.unlink(tmpfname) os.unlink(tmpoutname) - - - def test_standardize_target_encoding_raise_UnicodeEncodeError(self) -> None: - table: TableType = [["Å", "B", "C"], ['é', 'ü', '中'], [4, 5, 6]] + def test_standardize_target_encoding_raise_UnicodeEncodeError( + self, + ) -> None: + table: TableType = [["Å", "B", "C"], ["é", "ü", "中"], [4, 5, 6]] dialect = SimpleDialect(delimiter=";", quotechar="", escapechar="") - encoding='utf-8' + encoding = "utf-8" tmpfname = self._build_file(table, dialect, encoding=encoding) tmpfd, tmpoutname = tempfile.mkstemp(prefix="ccsv_", suffix=".csv") @@ -708,9 +715,12 @@ def test_standardize_target_encoding_raise_UnicodeEncodeError(self) -> None: application = build_application() tester = Tester(application) - try : + try: with self.assertRaises(UnicodeEncodeError): - tester.test_command("standardize", ["-o", tmpoutname, '-E', 'latin-1', tmpfname]) + tester.test_command( + "standardize", + ["-o", tmpoutname, "-E", "latin-1", tmpfname], + ) finally: os.unlink(tmpfname) os.unlink(tmpoutname) From a6f548e0934aa2816f1e3cad7a65ab5ccc5198e7 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste DESBAS Date: Sun, 3 Dec 2023 16:19:54 +0100 Subject: [PATCH 11/11] add open encoding --- tests/test_unit/test_console.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_unit/test_console.py b/tests/test_unit/test_console.py index 3151694..44dd098 100644 --- a/tests/test_unit/test_console.py +++ b/tests/test_unit/test_console.py @@ -660,7 +660,7 @@ def test_standardize_target_encoding(self) -> None: # Excel format (i.e. RFC4180) *requires* CRLF crlf = "\r\n" exp = crlf.join(["Å,B,C", "é,ü,中", "4,5,6", ""]) - with open(tmpoutname, "r", newline="") as fp: + with open(tmpoutname, "r", newline="", encoding="utf-8") as fp: output = fp.read() try: @@ -692,7 +692,7 @@ def test_standardize_target_encoding2(self) -> None: exp = crlf.join(["A,B,C", "é,è,à", "4,5,6", ""]) self.assertEqual("utf-8", get_encoding(tmpoutname, try_cchardet=False)) - with open(tmpoutname, "r", newline="") as fp: + with open(tmpoutname, "r", newline="", encoding="utf-8") as fp: output = fp.read() try: