Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

standardize : allow users to specify output encoding #118

Merged
merged 11 commits into from
Dec 3, 2023
31 changes: 23 additions & 8 deletions clevercsv/console/commands/standardize.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,16 @@ def register(self) -> None:
),
default=[],
)
self.add_argument(
"-E",
"--target_encoding",
jbdesbas marked this conversation as resolved.
Show resolved Hide resolved
help="Set the encoding of the output file(s)",
description=(
"If ommited, the output file encoding while be the same "
"as that of the original file."
),
type=str
)
self.add_argument(
"-i",
"--in-place",
Expand Down Expand Up @@ -115,6 +125,7 @@ def handle(self) -> int:
encodings = self.args.encoding
num_chars = parse_int(self.args.num_chars, "num-chars")
in_place = self.args.in_place
target_encoding = self.args.target_encoding

if in_place and outputs:
print(
Expand Down Expand Up @@ -154,6 +165,7 @@ def handle(self) -> int:
encoding=encoding,
verbose=verbose,
num_chars=num_chars,
target_encoding=target_encoding
)
if retval > 0 and global_retval == 0:
global_retval = retval
Expand All @@ -168,8 +180,10 @@ def handle_path(
encoding: Optional[str] = None,
num_chars: Optional[int] = None,
verbose: bool = False,
target_encoding: Optional[str] = None
) -> int:
encoding = encoding or get_encoding(path)
target_encoding = target_encoding or encoding
dialect = detect_dialect(
path, num_chars=num_chars, encoding=encoding, verbose=verbose
)
Expand All @@ -178,17 +192,17 @@ def handle_path(
return 1

if self.args.in_place:
return self._in_place(path, dialect, encoding)
return self._in_place(path, dialect, encoding, target_encoding)
elif output is None:
return self._to_stdout(path, dialect, encoding)
return self._to_file(path, output, dialect, encoding)
return self._to_file(path, output, dialect, encoding, target_encoding)

def _write_transposed(
self,
path: StrPath,
stream: SupportsWrite[str],
dialect: SimpleDialect,
encoding: Optional[str],
encoding: Optional[str]
) -> None:
with open(path, "r", newline="", encoding=encoding) as fp:
read = reader(fp, dialect=dialect)
Expand All @@ -203,7 +217,7 @@ def _write_direct(
path: StrPath,
stream: SupportsWrite[str],
dialect: SimpleDialect,
encoding: Optional[str],
encoding: Optional[str]
) -> None:
with open(path, "r", newline="", encoding=encoding) as fp:
read = reader(fp, dialect=dialect)
Expand All @@ -216,15 +230,15 @@ def _write_to_stream(
path: StrPath,
stream: SupportsWrite[str],
dialect: SimpleDialect,
encoding: Optional[str],
encoding: Optional[str]
) -> None:
if self.args.transpose:
self._write_transposed(path, stream, dialect, encoding)
else:
self._write_direct(path, stream, dialect, encoding)

def _in_place(
self, path: StrPath, dialect: SimpleDialect, encoding: Optional[str]
self, path: StrPath, dialect: SimpleDialect, encoding: Optional[str], target_encoding: Optional[str]
) -> int:
"""In-place mode overwrites the input file, if necessary

Expand All @@ -235,7 +249,7 @@ def _in_place(

"""
tmpfd, tmpfname = tempfile.mkstemp(prefix="clevercsv_", suffix=".csv")
tmpid = os.fdopen(tmpfd, "w", newline="", encoding=encoding)
tmpid = os.fdopen(tmpfd, "w", newline="", encoding=target_encoding)
self._write_to_stream(path, tmpid, dialect, encoding)
tmpid.close()

Expand Down Expand Up @@ -263,7 +277,8 @@ def _to_file(
output: StrPath,
dialect: SimpleDialect,
encoding: Optional[str],
target_encoding: Optional[str]
) -> int:
with open(output, "w", newline="", encoding=encoding) as fp:
with open(output, "w", newline="", encoding=target_encoding) as fp:
self._write_to_stream(path, fp, dialect, encoding)
return 0