Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

standardize : allow users to specify output encoding #118

Merged
merged 11 commits into from
Dec 3, 2023
45 changes: 31 additions & 14 deletions clevercsv/console/commands/standardize.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,16 @@ def register(self) -> None:
),
default=[],
)
self.add_argument(
"-E",
"--target_encoding",
jbdesbas marked this conversation as resolved.
Show resolved Hide resolved
help="Set the encoding of the output file(s)",
description=(
"If ommited, the output file encoding while be the same "
"as that of the original file."
),
type=str
)
self.add_argument(
"-i",
"--in-place",
Expand Down Expand Up @@ -115,6 +125,7 @@ def handle(self) -> int:
encodings = self.args.encoding
num_chars = parse_int(self.args.num_chars, "num-chars")
in_place = self.args.in_place
target_encoding = self.args.target_encoding

if in_place and outputs:
print(
Expand Down Expand Up @@ -154,6 +165,7 @@ def handle(self) -> int:
encoding=encoding,
verbose=verbose,
num_chars=num_chars,
target_encoding=target_encoding
)
if retval > 0 and global_retval == 0:
global_retval = retval
Expand All @@ -168,6 +180,7 @@ def handle_path(
encoding: Optional[str] = None,
num_chars: Optional[int] = None,
verbose: bool = False,
target_encoding: Optional[str] = None
) -> int:
encoding = encoding or get_encoding(path)
dialect = detect_dialect(
Expand All @@ -178,23 +191,24 @@ def handle_path(
return 1

if self.args.in_place:
return self._in_place(path, dialect, encoding)
return self._in_place(path, dialect, encoding, target_encoding)
elif output is None:
return self._to_stdout(path, dialect, encoding)
return self._to_file(path, output, dialect, encoding)
return self._to_stdout(path, dialect, encoding, target_encoding)
return self._to_file(path, output, dialect, encoding, target_encoding)

def _write_transposed(
self,
path: StrPath,
stream: SupportsWrite[str],
dialect: SimpleDialect,
encoding: Optional[str],
target_encoding: Optional[str]
) -> None:
with open(path, "r", newline="", encoding=encoding) as fp:
read = reader(fp, dialect=dialect)
rows = list(read)
rows = list(map(list, zip(*rows)))
write = writer(stream, dialect="excel")
write = writer(stream, dialect="excel", encoding=target_encoding or encoding)
jbdesbas marked this conversation as resolved.
Show resolved Hide resolved
for row in rows:
write.writerow(row)

Expand All @@ -204,10 +218,11 @@ def _write_direct(
stream: SupportsWrite[str],
dialect: SimpleDialect,
encoding: Optional[str],
target_encoding: Optional[str]
) -> None:
with open(path, "r", newline="", encoding=encoding) as fp:
read = reader(fp, dialect=dialect)
write = writer(stream, dialect="excel")
write = writer(stream, dialect="excel", encoding=target_encoding or encoding)
for row in read:
write.writerow(row)

Expand All @@ -217,14 +232,15 @@ def _write_to_stream(
stream: SupportsWrite[str],
dialect: SimpleDialect,
encoding: Optional[str],
target_encoding: Optional[str]
) -> None:
if self.args.transpose:
self._write_transposed(path, stream, dialect, encoding)
self._write_transposed(path, stream, dialect, encoding, target_encoding)
else:
self._write_direct(path, stream, dialect, encoding)
self._write_direct(path, stream, dialect, encoding, target_encoding)

def _in_place(
self, path: StrPath, dialect: SimpleDialect, encoding: Optional[str]
self, path: StrPath, dialect: SimpleDialect, encoding: Optional[str], target_encoding: Optional[str]
) -> int:
"""In-place mode overwrites the input file, if necessary

Expand All @@ -235,8 +251,8 @@ def _in_place(

"""
tmpfd, tmpfname = tempfile.mkstemp(prefix="clevercsv_", suffix=".csv")
tmpid = os.fdopen(tmpfd, "w", newline="", encoding=encoding)
self._write_to_stream(path, tmpid, dialect, encoding)
tmpid = os.fdopen(tmpfd, "w", newline="", encoding=target_encoding or encoding)
self._write_to_stream(path, tmpid, dialect, encoding, target_encoding)
tmpid.close()

previous_sha1 = sha1sum(path)
Expand All @@ -249,10 +265,10 @@ def _in_place(
return 2

def _to_stdout(
self, path: StrPath, dialect: SimpleDialect, encoding: Optional[str]
self, path: StrPath, dialect: SimpleDialect, encoding: Optional[str], target_encoding: Optional[str]
) -> int:
stream = io.StringIO(newline="")
jbdesbas marked this conversation as resolved.
Show resolved Hide resolved
self._write_to_stream(path, stream, dialect, encoding)
self._write_to_stream(path, stream, dialect, encoding, target_encoding)
print(stream.getvalue(), end="")
stream.close()
return 0
Expand All @@ -263,7 +279,8 @@ def _to_file(
output: StrPath,
dialect: SimpleDialect,
encoding: Optional[str],
target_encoding: Optional[str]
) -> int:
with open(output, "w", newline="", encoding=encoding) as fp:
self._write_to_stream(path, fp, dialect, encoding)
with open(output, "w", newline="", encoding=target_encoding or encoding) as fp:
self._write_to_stream(path, fp, dialect, encoding, target_encoding)
return 0