From 6e9c9ef88fb58e2f8d2c8c3c67e8123f4c49e89a Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Sun, 14 Jan 2024 22:32:15 +0100 Subject: [PATCH] Options accepting a file should accept lists of files The rationale is that these options readily accept multiple files from the command line, because they can be specified multiple times. However, duplicate option keys are invalid in an INI config file. Our alternative here is to accept multiple comma-separated values for each option key, in addition to mulitple occurrences of the same option key. --- codespell_lib/_codespell.py | 63 +++++++++++++++++-------------- codespell_lib/tests/test_basic.py | 21 +++++++++++ pyproject.toml | 2 +- 3 files changed, 56 insertions(+), 30 deletions(-) diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index 1904f7c852..62a51b75b3 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -390,10 +390,9 @@ def parse_options( "-D", "--dictionary", action="append", - help="custom dictionary file that contains spelling " - "corrections. If this flag is not specified or " - 'equals "-" then the default dictionary is used. ' - "This option can be specified multiple times.", + help="comma-separated list of custom dictionary files that " + "contain spelling corrections. If this flag is not specified " + 'or equals "-" then the default dictionary is used.', ) builtin_opts = "\n- ".join( [""] + [f"{d[0]!r} {d[1]}" for d in _builtin_dictionaries] @@ -423,26 +422,26 @@ def parse_options( "-I", "--ignore-words", action="append", - metavar="FILE", - help="file that contains words that will be ignored " - "by codespell. File must contain 1 word per line." - " Words are case sensitive based on how they are " - "written in the dictionary file", + metavar="FILES", + help="comma-separated list of files that contain " + "words to be ignored by codespell. Files must contain " + "1 word per line. Words are case sensitive based on " + "how they are written in the dictionary file.", ) parser.add_argument( "-L", "--ignore-words-list", action="append", metavar="WORDS", - help="comma separated list of words to be ignored " + help="comma-separated list of words to be ignored " "by codespell. Words are case sensitive based on " - "how they are written in the dictionary file", + "how they are written in the dictionary file.", ) parser.add_argument( "--uri-ignore-words-list", action="append", metavar="WORDS", - help="comma separated list of words to be ignored " + help="comma-separated list of words to be ignored " "by codespell in URIs and emails only. Words are " "case sensitive based on how they are written in " 'the dictionary file. If set to "*", all ' @@ -494,11 +493,13 @@ def parse_options( parser.add_argument( "-x", "--exclude-file", + action="append", type=str, - metavar="FILE", - help="ignore whole lines that match those " - "in the file FILE. The lines in FILE " - "should match the to-be-excluded lines exactly", + metavar="FILES", + help="ignore whole lines that match those in " + "the comma-separated list of files EXCLUDE. " + "The lines in these files should match the " + "to-be-excluded lines exactly", ) parser.add_argument( @@ -1166,20 +1167,22 @@ def main(*args: str) -> int: else: ignore_word_regex = None - ignore_words_files = options.ignore_words or [] ignore_words, ignore_words_cased = parse_ignore_words_option( options.ignore_words_list ) - - for ignore_words_file in ignore_words_files: - if not os.path.isfile(ignore_words_file): - print( - f"ERROR: cannot find ignore-words file: {ignore_words_file}", - file=sys.stderr, - ) - parser.print_help() - return EX_USAGE - build_ignore_words(ignore_words_file, ignore_words, ignore_words_cased) + if options.ignore_words: + ignore_words_files = flatten_clean_comma_separated_arguments( + options.ignore_words + ) + for ignore_words_file in ignore_words_files: + if not os.path.isfile(ignore_words_file): + print( + f"ERROR: cannot find ignore-words file: {ignore_words_file}", + file=sys.stderr, + ) + parser.print_help() + return EX_USAGE + build_ignore_words(ignore_words_file, ignore_words, ignore_words_cased) uri_regex = options.uri_regex or uri_regex_def try: @@ -1196,7 +1199,7 @@ def main(*args: str) -> int: itertools.chain(*parse_ignore_words_option(options.uri_ignore_words_list)) ) - dictionaries = options.dictionary or ["-"] + dictionaries = flatten_clean_comma_separated_arguments(options.dictionary or ["-"]) use_dictionaries = [] for dictionary in dictionaries: @@ -1258,7 +1261,9 @@ def main(*args: str) -> int: exclude_lines: Set[str] = set() if options.exclude_file: - build_exclude_hashes(options.exclude_file, exclude_lines) + exclude_files = flatten_clean_comma_separated_arguments(options.exclude_file) + for exclude_file in exclude_files: + build_exclude_hashes(exclude_file, exclude_lines) file_opener = FileOpener(options.hard_encoding_detection, options.quiet_level) diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index 89a9b549d2..51ee4b8390 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -342,6 +342,20 @@ def test_ignore_dictionary( fname = tmp_path / "ignore.txt" fname.write_text("abandonned\nabilty\r\nackward") assert cs.main("-I", fname, bad_name) == 1 + # missing file in ignore list + fname_missing = tmp_path / "missing.txt" + result = cs.main("-I", fname_missing, bad_name, std=True) + assert isinstance(result, tuple) + code, _, stderr = result + assert code == EX_USAGE + assert "ERROR:" in stderr + # comma-separated list of files + fname_dummy1 = tmp_path / "dummy1.txt" + fname_dummy1.touch() + fname_dummy2 = tmp_path / "dummy2.txt" + fname_dummy2.touch() + assert cs.main("-I", fname_dummy1, "-I", fname, "-I", fname_dummy2, bad_name) == 1 + assert cs.main("-I", f"{fname_dummy1},{fname},{fname_dummy2}", bad_name) == 1 def test_ignore_words_with_cases( @@ -495,6 +509,13 @@ def test_exclude_file( ) assert cs.main(bad_name) == 18 assert cs.main("-x", fname, bad_name) == 1 + # comma-separated list of files + fname_dummy1 = tmp_path / "dummy1.txt" + fname_dummy1.touch() + fname_dummy2 = tmp_path / "dummy2.txt" + fname_dummy2.touch() + assert cs.main("-x", fname_dummy1, "-x", fname, "-x", fname_dummy2, bad_name) == 1 + assert cs.main("-x", f"{fname_dummy1},{fname},{fname_dummy2}", bad_name) == 1 def test_encoding( diff --git a/pyproject.toml b/pyproject.toml index 70aa216f06..32a566b813 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -169,6 +169,6 @@ max-complexity = 45 [tool.ruff.lint.pylint] allow-magic-value-types = ["bytes", "int", "str",] max-args = 13 -max-branches = 49 +max-branches = 51 max-returns = 11 max-statements = 119