From de9a5dbb97c999f5938072ae9c3652cf67a5a296 Mon Sep 17 00:00:00 2001 From: Tomoya Fujita Date: Sun, 2 Mar 2025 23:21:05 +0000 Subject: [PATCH] support `--ignore-words-case-sensitive` option. Signed-off-by: Tomoya Fujita --- README.rst | 5 +++- codespell_lib/_codespell.py | 46 +++++++++++++++++++++++++------ codespell_lib/tests/test_basic.py | 37 +++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 9 deletions(-) diff --git a/README.rst b/README.rst index 8e256dc923..a02c3d27f1 100644 --- a/README.rst +++ b/README.rst @@ -119,7 +119,10 @@ You can select the optional dictionaries with the ``--builtin`` option. Ignoring words -------------- -When ignoring false positives, note that spelling errors are *case-insensitive* but words to ignore are *case-sensitive*. For example, the dictionary entry ``wrod`` will also match the typo ``Wrod``, but to ignore it you must pass ``wrod`` (to match the case of the dictionary entry). +When ignoring false positives, note that spelling errors are *case-insensitive*. +By default, only lowercase words to ignore are *case-insensitive*. For example, the dictionary entry ``wrod`` will also match the typo ``Wrod``, to ignore it you can pass ``wrod``. +Non-lowercase words to ignore are *case-sensitive*. For example, the dictionary entry ``wrod`` will also match the typo ``Wrod``. To ignore it you must pass ``Wrod``. +If you want to ignore all the words in *case-sensitive* mode including lowercase words, you can use the ``--ignore-words-case-sensitive`` optional flag. The words to ignore can be passed in two ways: diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index dee6a63ee8..e429fc2a62 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -476,6 +476,13 @@ def parse_options( 'the dictionary file. If set to "*", all ' "misspelling in URIs and emails will be ignored.", ) + parser.add_argument( + "--ignore-words-case-sensitive", + action="store_true", + default=False, + help="all ignore words in the ignore-words arguments in a case-sensitive way. " + "By default, lowercase words to ignore are handled in a case-insensitive way.", + ) parser.add_argument( "-r", "--regex", @@ -697,18 +704,25 @@ def parse_options( def process_ignore_words( - words: Iterable[str], ignore_words: Set[str], ignore_words_cased: Set[str] + words: Iterable[str], + ignore_words: Set[str], + ignore_words_cased: Set[str], + ignore_words_case_sensitive: bool = False, ) -> None: for word in words: word = word.strip() - if word == word.lower(): + if ignore_words_case_sensitive: + # all ignore words are handled in a case-sensitive way + ignore_words_cased.add(word) + elif word == word.lower(): + # lowercase words to ignore are handled in a case-insensitive way ignore_words.add(word) else: ignore_words_cased.add(word) def parse_ignore_words_option( - ignore_words_option: List[str], + ignore_words_option: List[str], ignore_words_case_sensitive: bool = False ) -> Tuple[Set[str], Set[str]]: ignore_words: Set[str] = set() ignore_words_cased: Set[str] = set() @@ -718,6 +732,7 @@ def parse_ignore_words_option( (word.strip() for word in comma_separated_words.split(",")), ignore_words, ignore_words_cased, + ignore_words_case_sensitive, ) return (ignore_words, ignore_words_cased) @@ -728,11 +743,17 @@ def build_exclude_hashes(filename: str, exclude_lines: Set[str]) -> None: def build_ignore_words( - filename: str, ignore_words: Set[str], ignore_words_cased: Set[str] + filename: str, + ignore_words: Set[str], + ignore_words_cased: Set[str], + ignore_word_case_sensitive: bool = False, ) -> None: with open(filename, encoding="utf-8") as f: process_ignore_words( - (line.strip() for line in f), ignore_words, ignore_words_cased + (line.strip() for line in f), + ignore_words, + ignore_words_cased, + ignore_word_case_sensitive, ) @@ -1173,7 +1194,7 @@ def main(*args: str) -> int: ignore_multiline_regex = None ignore_words, ignore_words_cased = parse_ignore_words_option( - options.ignore_words_list + options.ignore_words_list, options.ignore_words_case_sensitive ) if options.ignore_words: ignore_words_files = flatten_clean_comma_separated_arguments( @@ -1185,7 +1206,12 @@ def main(*args: str) -> int: parser, f"ERROR: cannot find ignore-words file: {ignore_words_file}", ) - build_ignore_words(ignore_words_file, ignore_words, ignore_words_cased) + build_ignore_words( + ignore_words_file, + ignore_words, + ignore_words_cased, + options.ignore_words_case_sensitive, + ) uri_regex = options.uri_regex or uri_regex_def try: @@ -1197,7 +1223,11 @@ def main(*args: str) -> int: ) uri_ignore_words = set( - itertools.chain(*parse_ignore_words_option(options.uri_ignore_words_list)) + itertools.chain( + *parse_ignore_words_option( + options.uri_ignore_words_list, options.ignore_words_case_sensitive + ) + ) ) dictionaries = flatten_clean_comma_separated_arguments(options.dictionary or ["-"]) diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index a6c05fc089..c6e9494fb5 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -389,6 +389,7 @@ def test_ignore_words_with_cases( assert cs.main("-LMIS,Mis", bad_name) == 1 assert cs.main("-I", fname, "-f", bad_name) == 1 assert cs.main("-LMIS,Mis", "-f", bad_name) == 1 + # Only lowercase words are ignored works in a case-insensitive manner fname.write_text("mis") assert cs.main("-I", fname, bad_name) == 0 assert cs.main("-Lmis", bad_name) == 0 @@ -396,6 +397,42 @@ def test_ignore_words_with_cases( assert cs.main("-Lmis", "-f", bad_name) == 0 +def test_ignore_words_with_case_sensitive( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + """Test --ignore-words-case-sensitive for -I and -L options.""" + bad_name = tmp_path / "MIS.txt" + bad_name.write_text( + "1 MIS (Management Information System) 1\n2 Les Mis (1980 musical) 2\n3 mis 3\n" + ) + assert cs.main(bad_name) == 3 + assert cs.main(bad_name, "-f") == 4 + fname = tmp_path / "ignore.txt" + + fname.write_text("miS") + assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 3 + assert cs.main("--ignore-words-case-sensitive", "-LmiS", bad_name) == 3 + assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 4 + assert cs.main("--ignore-words-case-sensitive", "-LmiS", "-f", bad_name) == 4 + # lowercase words are ignored also works in a case-sensitive manner + fname.write_text("mis") + assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 2 + assert cs.main("--ignore-words-case-sensitive", "-Lmis", bad_name) == 2 + assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 3 + assert cs.main("--ignore-words-case-sensitive", "-Lmis", "-f", bad_name) == 3 + fname.write_text("MIS") + assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 2 + assert cs.main("--ignore-words-case-sensitive", "-LMIS", bad_name) == 2 + assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 2 + assert cs.main("--ignore-words-case-sensitive", "-LMIS", "-f", bad_name) == 2 + fname.write_text("MIS\nMis") + assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 1 + assert cs.main("--ignore-words-case-sensitive", "-LMIS,Mis", bad_name) == 1 + assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 1 + assert cs.main("--ignore-words-case-sensitive", "-LMIS,Mis", "-f", bad_name) == 1 + + def test_ignore_word_list( tmp_path: Path, capsys: pytest.CaptureFixture[str],