From de9a5dbb97c999f5938072ae9c3652cf67a5a296 Mon Sep 17 00:00:00 2001
From: Tomoya Fujita <Tomoya.Fujita@sony.com>
Date: Sun, 2 Mar 2025 23:21:05 +0000
Subject: [PATCH] support `--ignore-words-case-sensitive` option.

Signed-off-by: Tomoya Fujita <Tomoya.Fujita@sony.com>
---
 README.rst                        |  5 +++-
 codespell_lib/_codespell.py       | 46 +++++++++++++++++++++++++------
 codespell_lib/tests/test_basic.py | 37 +++++++++++++++++++++++++
 3 files changed, 79 insertions(+), 9 deletions(-)

diff --git a/README.rst b/README.rst
index 8e256dc923..a02c3d27f1 100644
--- a/README.rst
+++ b/README.rst
@@ -119,7 +119,10 @@ You can select the optional dictionaries with the ``--builtin`` option.
 Ignoring words
 --------------
 
-When ignoring false positives, note that spelling errors are *case-insensitive* but words to ignore are *case-sensitive*. For example, the dictionary entry ``wrod`` will also match the typo ``Wrod``, but to ignore it you must pass ``wrod`` (to match the case of the dictionary entry).
+When ignoring false positives, note that spelling errors are *case-insensitive*.
+By default, only lowercase words to ignore are *case-insensitive*. For example, the dictionary entry ``wrod`` will also match the typo ``Wrod``, to ignore it you can pass ``wrod``.
+Non-lowercase words to ignore are *case-sensitive*. For example, the dictionary entry ``wrod`` will also match the typo ``Wrod``. To ignore it you must pass ``Wrod``.
+If you want to ignore all the words in *case-sensitive* mode including lowercase words, you can use the ``--ignore-words-case-sensitive`` optional flag.
 
 The words to ignore can be passed in two ways:
 
diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
index dee6a63ee8..e429fc2a62 100644
--- a/codespell_lib/_codespell.py
+++ b/codespell_lib/_codespell.py
@@ -476,6 +476,13 @@ def parse_options(
         'the dictionary file. If set to "*", all '
         "misspelling in URIs and emails will be ignored.",
     )
+    parser.add_argument(
+        "--ignore-words-case-sensitive",
+        action="store_true",
+        default=False,
+        help="all ignore words in the ignore-words arguments in a case-sensitive way. "
+        "By default, lowercase words to ignore are handled in a case-insensitive way.",
+    )
     parser.add_argument(
         "-r",
         "--regex",
@@ -697,18 +704,25 @@ def parse_options(
 
 
 def process_ignore_words(
-    words: Iterable[str], ignore_words: Set[str], ignore_words_cased: Set[str]
+    words: Iterable[str],
+    ignore_words: Set[str],
+    ignore_words_cased: Set[str],
+    ignore_words_case_sensitive: bool = False,
 ) -> None:
     for word in words:
         word = word.strip()
-        if word == word.lower():
+        if ignore_words_case_sensitive:
+            # all ignore words are handled in a case-sensitive way
+            ignore_words_cased.add(word)
+        elif word == word.lower():
+            # lowercase words to ignore are handled in a case-insensitive way
             ignore_words.add(word)
         else:
             ignore_words_cased.add(word)
 
 
 def parse_ignore_words_option(
-    ignore_words_option: List[str],
+    ignore_words_option: List[str], ignore_words_case_sensitive: bool = False
 ) -> Tuple[Set[str], Set[str]]:
     ignore_words: Set[str] = set()
     ignore_words_cased: Set[str] = set()
@@ -718,6 +732,7 @@ def parse_ignore_words_option(
                 (word.strip() for word in comma_separated_words.split(",")),
                 ignore_words,
                 ignore_words_cased,
+                ignore_words_case_sensitive,
             )
     return (ignore_words, ignore_words_cased)
 
@@ -728,11 +743,17 @@ def build_exclude_hashes(filename: str, exclude_lines: Set[str]) -> None:
 
 
 def build_ignore_words(
-    filename: str, ignore_words: Set[str], ignore_words_cased: Set[str]
+    filename: str,
+    ignore_words: Set[str],
+    ignore_words_cased: Set[str],
+    ignore_word_case_sensitive: bool = False,
 ) -> None:
     with open(filename, encoding="utf-8") as f:
         process_ignore_words(
-            (line.strip() for line in f), ignore_words, ignore_words_cased
+            (line.strip() for line in f),
+            ignore_words,
+            ignore_words_cased,
+            ignore_word_case_sensitive,
         )
 
 
@@ -1173,7 +1194,7 @@ def main(*args: str) -> int:
         ignore_multiline_regex = None
 
     ignore_words, ignore_words_cased = parse_ignore_words_option(
-        options.ignore_words_list
+        options.ignore_words_list, options.ignore_words_case_sensitive
     )
     if options.ignore_words:
         ignore_words_files = flatten_clean_comma_separated_arguments(
@@ -1185,7 +1206,12 @@ def main(*args: str) -> int:
                     parser,
                     f"ERROR: cannot find ignore-words file: {ignore_words_file}",
                 )
-            build_ignore_words(ignore_words_file, ignore_words, ignore_words_cased)
+            build_ignore_words(
+                ignore_words_file,
+                ignore_words,
+                ignore_words_cased,
+                options.ignore_words_case_sensitive,
+            )
 
     uri_regex = options.uri_regex or uri_regex_def
     try:
@@ -1197,7 +1223,11 @@ def main(*args: str) -> int:
         )
 
     uri_ignore_words = set(
-        itertools.chain(*parse_ignore_words_option(options.uri_ignore_words_list))
+        itertools.chain(
+            *parse_ignore_words_option(
+                options.uri_ignore_words_list, options.ignore_words_case_sensitive
+            )
+        )
     )
 
     dictionaries = flatten_clean_comma_separated_arguments(options.dictionary or ["-"])
diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py
index a6c05fc089..c6e9494fb5 100644
--- a/codespell_lib/tests/test_basic.py
+++ b/codespell_lib/tests/test_basic.py
@@ -389,6 +389,7 @@ def test_ignore_words_with_cases(
     assert cs.main("-LMIS,Mis", bad_name) == 1
     assert cs.main("-I", fname, "-f", bad_name) == 1
     assert cs.main("-LMIS,Mis", "-f", bad_name) == 1
+    # Only lowercase words are ignored works in a case-insensitive manner
     fname.write_text("mis")
     assert cs.main("-I", fname, bad_name) == 0
     assert cs.main("-Lmis", bad_name) == 0
@@ -396,6 +397,42 @@ def test_ignore_words_with_cases(
     assert cs.main("-Lmis", "-f", bad_name) == 0
 
 
+def test_ignore_words_with_case_sensitive(
+    tmp_path: Path,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """Test --ignore-words-case-sensitive for -I and -L options."""
+    bad_name = tmp_path / "MIS.txt"
+    bad_name.write_text(
+        "1 MIS (Management Information System) 1\n2 Les Mis (1980 musical) 2\n3 mis 3\n"
+    )
+    assert cs.main(bad_name) == 3
+    assert cs.main(bad_name, "-f") == 4
+    fname = tmp_path / "ignore.txt"
+
+    fname.write_text("miS")
+    assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 3
+    assert cs.main("--ignore-words-case-sensitive", "-LmiS", bad_name) == 3
+    assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 4
+    assert cs.main("--ignore-words-case-sensitive", "-LmiS", "-f", bad_name) == 4
+    # lowercase words are ignored also works in a case-sensitive manner
+    fname.write_text("mis")
+    assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 2
+    assert cs.main("--ignore-words-case-sensitive", "-Lmis", bad_name) == 2
+    assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 3
+    assert cs.main("--ignore-words-case-sensitive", "-Lmis", "-f", bad_name) == 3
+    fname.write_text("MIS")
+    assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 2
+    assert cs.main("--ignore-words-case-sensitive", "-LMIS", bad_name) == 2
+    assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 2
+    assert cs.main("--ignore-words-case-sensitive", "-LMIS", "-f", bad_name) == 2
+    fname.write_text("MIS\nMis")
+    assert cs.main("--ignore-words-case-sensitive", "-I", fname, bad_name) == 1
+    assert cs.main("--ignore-words-case-sensitive", "-LMIS,Mis", bad_name) == 1
+    assert cs.main("--ignore-words-case-sensitive", "-I", fname, "-f", bad_name) == 1
+    assert cs.main("--ignore-words-case-sensitive", "-LMIS,Mis", "-f", bad_name) == 1
+
+
 def test_ignore_word_list(
     tmp_path: Path,
     capsys: pytest.CaptureFixture[str],