From 98e2b9cccf57e7e2c68f5c9d005e109b859e5db9 Mon Sep 17 00:00:00 2001 From: AlightSoulmate <2314297572@qq.com> Date: Wed, 15 Apr 2026 00:16:59 +0800 Subject: [PATCH 01/14] Allow comment in dictionary --- codespell_lib/_spellchecker.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/codespell_lib/_spellchecker.py b/codespell_lib/_spellchecker.py index 7b511e6d3e..5e59242544 100644 --- a/codespell_lib/_spellchecker.py +++ b/codespell_lib/_spellchecker.py @@ -54,6 +54,9 @@ def build_dict( with open(filename, encoding="utf-8") as f: translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars] for line in f: + line = line.strip() + if not line or line.startswith("#") or "->" not in line: + continue [key, data] = line.split("->") # TODO: For now, convert both to lower. # Someday we can maybe add support for fixing caps. From 39ed8e9b930150e5da9fd5b864d5de29ad6bfebf Mon Sep 17 00:00:00 2001 From: AlightSoulmate <2314297572@qq.com> Date: Wed, 15 Apr 2026 22:30:17 +0800 Subject: [PATCH 02/14] Allow inline comments and add a test --- codespell_lib/_spellchecker.py | 18 ++++++++++++++--- codespell_lib/tests/test_basic.py | 32 +++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/codespell_lib/_spellchecker.py b/codespell_lib/_spellchecker.py index 5e59242544..325f4ac48f 100644 --- a/codespell_lib/_spellchecker.py +++ b/codespell_lib/_spellchecker.py @@ -15,6 +15,7 @@ Copyright (C) 2010-2011 Lucas De Marchi Copyright (C) 2011 ProFUSION embedded systems """ +import sys # Pass all misspellings through this translation table to generate # alternative misspellings and fixes. @@ -54,10 +55,21 @@ def build_dict( with open(filename, encoding="utf-8") as f: translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars] for line in f: - line = line.strip() - if not line or line.startswith("#") or "->" not in line: + left, pound, _ = line.partition("#") + if pound and left and left[-1] not in (' ', '\t'): + print( + f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}", + file=sys.stderr, + ) + continue + + line = left.strip() + if not line: + continue + try: + [key, data] = line.split("->") + except ValueError: continue - [key, data] = line.split("->") # TODO: For now, convert both to lower. # Someday we can maybe add support for fixing caps. key = key.lower() diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index 5120e1e8a1..3ba2328fa8 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -1524,3 +1524,35 @@ def test_args_from_file( print("Testing with direct call to cs_.main()") r = cs_.main(*args[1:]) print(f"{r=}") + + +def test_dict_comments( + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + """Test dictionary comments and blank lines.""" + fname = tmp_path / "bad.txt" + fname.write_text("abandonned\noccured\n") + + dictionary = tmp_path / "test.txt" + dictionary.write_text( + "#comment\n" + "# comment\n" + " #comment\n" + "\n" + "\r\n" + "abandonned->abandoned # inline comment\n" + "occured->occurred# invalid inline comment\n" + "abil#ity->ability # hash in illegal position\n", + encoding="utf-8", + ) + + # Allow valid inline comments. + # Skip entries where '#' is not preceded by whitespace. + result = cs.main("-D", dictionary, fname, std=True) + assert isinstance(result, tuple) + code, stdout, stderr = result + assert code == 1 + assert "abandonned ==> abandoned" in stdout + assert "occured ==> occurred" not in stdout + assert "missing spaces before #" in stderr \ No newline at end of file From e952d6f09b6507df8c66d492d3bf09775da10ecf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 15 Apr 2026 14:34:54 +0000 Subject: [PATCH 03/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- codespell_lib/_spellchecker.py | 3 ++- codespell_lib/tests/test_basic.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/codespell_lib/_spellchecker.py b/codespell_lib/_spellchecker.py index 325f4ac48f..e961a12493 100644 --- a/codespell_lib/_spellchecker.py +++ b/codespell_lib/_spellchecker.py @@ -15,6 +15,7 @@ Copyright (C) 2010-2011 Lucas De Marchi Copyright (C) 2011 ProFUSION embedded systems """ + import sys # Pass all misspellings through this translation table to generate @@ -56,7 +57,7 @@ def build_dict( translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars] for line in f: left, pound, _ = line.partition("#") - if pound and left and left[-1] not in (' ', '\t'): + if pound and left and left[-1] not in (" ", "\t"): print( f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}", file=sys.stderr, diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index 3ba2328fa8..479aa7b5c3 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -1555,4 +1555,4 @@ def test_dict_comments( assert code == 1 assert "abandonned ==> abandoned" in stdout assert "occured ==> occurred" not in stdout - assert "missing spaces before #" in stderr \ No newline at end of file + assert "missing spaces before #" in stderr From 40336cee2fa6819cfaff7d4c5e96b15e745c2dc9 Mon Sep 17 00:00:00 2001 From: Cyrus Yu <2314297572@qq.com> Date: Wed, 15 Apr 2026 23:25:30 +0800 Subject: [PATCH 04/14] Update codespell_lib/_spellchecker.py Co-authored-by: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> --- codespell_lib/_spellchecker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codespell_lib/_spellchecker.py b/codespell_lib/_spellchecker.py index e961a12493..bb2a081623 100644 --- a/codespell_lib/_spellchecker.py +++ b/codespell_lib/_spellchecker.py @@ -68,7 +68,7 @@ def build_dict( if not line: continue try: - [key, data] = line.split("->") + key, data = line.split("->") except ValueError: continue # TODO: For now, convert both to lower. From 672b70474d61be6cba20eef195db20ece6613da3 Mon Sep 17 00:00:00 2001 From: Cyrus Yu <2314297572@qq.com> Date: Wed, 15 Apr 2026 23:30:10 +0800 Subject: [PATCH 05/14] fix unexpected import --- codespell_lib/_spellchecker.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/codespell_lib/_spellchecker.py b/codespell_lib/_spellchecker.py index bb2a081623..ee54be7d59 100644 --- a/codespell_lib/_spellchecker.py +++ b/codespell_lib/_spellchecker.py @@ -16,8 +16,6 @@ Copyright (C) 2011 ProFUSION embedded systems """ -import sys - # Pass all misspellings through this translation table to generate # alternative misspellings and fixes. alt_chars = (("'", "’"),) # noqa: RUF001 @@ -58,10 +56,7 @@ def build_dict( for line in f: left, pound, _ = line.partition("#") if pound and left and left[-1] not in (" ", "\t"): - print( - f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}", - file=sys.stderr, - ) + print(f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}") continue line = left.strip() From 54c6c772304929b05425c47850f518640f43eee3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 15 Apr 2026 15:30:25 +0000 Subject: [PATCH 06/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- codespell_lib/_spellchecker.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/codespell_lib/_spellchecker.py b/codespell_lib/_spellchecker.py index ee54be7d59..ff70a8f6e3 100644 --- a/codespell_lib/_spellchecker.py +++ b/codespell_lib/_spellchecker.py @@ -56,7 +56,9 @@ def build_dict( for line in f: left, pound, _ = line.partition("#") if pound and left and left[-1] not in (" ", "\t"): - print(f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}") + print( + f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}" + ) continue line = left.strip() From 8516c18cffd879295c1ac269e981a6ae0e470808 Mon Sep 17 00:00:00 2001 From: Cyrus Yu <2314297572@qq.com> Date: Wed, 15 Apr 2026 23:36:13 +0800 Subject: [PATCH 07/14] fix warning statement --- codespell_lib/_spellchecker.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/codespell_lib/_spellchecker.py b/codespell_lib/_spellchecker.py index ff70a8f6e3..ee54be7d59 100644 --- a/codespell_lib/_spellchecker.py +++ b/codespell_lib/_spellchecker.py @@ -56,9 +56,7 @@ def build_dict( for line in f: left, pound, _ = line.partition("#") if pound and left and left[-1] not in (" ", "\t"): - print( - f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}" - ) + print(f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}") continue line = left.strip() From b2f67590f1131b87197de5d7c2d10ed5a887eff8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 15 Apr 2026 15:36:29 +0000 Subject: [PATCH 08/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- codespell_lib/_spellchecker.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/codespell_lib/_spellchecker.py b/codespell_lib/_spellchecker.py index ee54be7d59..ff70a8f6e3 100644 --- a/codespell_lib/_spellchecker.py +++ b/codespell_lib/_spellchecker.py @@ -56,7 +56,9 @@ def build_dict( for line in f: left, pound, _ = line.partition("#") if pound and left and left[-1] not in (" ", "\t"): - print(f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}") + print( + f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}" + ) continue line = left.strip() From c76c7dc9cfa259393b0e9e4da0cd81c11f0898cf Mon Sep 17 00:00:00 2001 From: AlightSoulmate <2314297572@qq.com> Date: Wed, 15 Apr 2026 23:48:34 +0800 Subject: [PATCH 09/14] Fix unexpected sys import and typos --- codespell_lib/_spellchecker.py | 6 +----- codespell_lib/tests/test_basic.py | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/codespell_lib/_spellchecker.py b/codespell_lib/_spellchecker.py index 325f4ac48f..32b6b4e654 100644 --- a/codespell_lib/_spellchecker.py +++ b/codespell_lib/_spellchecker.py @@ -15,7 +15,6 @@ Copyright (C) 2010-2011 Lucas De Marchi Copyright (C) 2011 ProFUSION embedded systems """ -import sys # Pass all misspellings through this translation table to generate # alternative misspellings and fixes. @@ -57,10 +56,7 @@ def build_dict( for line in f: left, pound, _ = line.partition("#") if pound and left and left[-1] not in (' ', '\t'): - print( - f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}", - file=sys.stderr, - ) + print(f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}") continue line = left.strip() diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index 3ba2328fa8..f6625bcfbe 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -1551,8 +1551,8 @@ def test_dict_comments( # Skip entries where '#' is not preceded by whitespace. result = cs.main("-D", dictionary, fname, std=True) assert isinstance(result, tuple) - code, stdout, stderr = result + code, stdout, _ = result assert code == 1 assert "abandonned ==> abandoned" in stdout assert "occured ==> occurred" not in stdout - assert "missing spaces before #" in stderr \ No newline at end of file + assert "missing spaces before #" in stdout \ No newline at end of file From d665b808d5423d84c74f2650e82d5ba380ef5393 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 15 Apr 2026 15:51:34 +0000 Subject: [PATCH 10/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- codespell_lib/_spellchecker.py | 6 ++++-- codespell_lib/tests/test_basic.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/codespell_lib/_spellchecker.py b/codespell_lib/_spellchecker.py index 998b106eed..ff70a8f6e3 100644 --- a/codespell_lib/_spellchecker.py +++ b/codespell_lib/_spellchecker.py @@ -55,8 +55,10 @@ def build_dict( translate_tables = [(x, str.maketrans(x, y)) for x, y in alt_chars] for line in f: left, pound, _ = line.partition("#") - if pound and left and left[-1] not in (' ', '\t'): - print(f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}") + if pound and left and left[-1] not in (" ", "\t"): + print( + f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}" + ) continue line = left.strip() diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index f6625bcfbe..dfa6673f3f 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -1555,4 +1555,4 @@ def test_dict_comments( assert code == 1 assert "abandonned ==> abandoned" in stdout assert "occured ==> occurred" not in stdout - assert "missing spaces before #" in stdout \ No newline at end of file + assert "missing spaces before #" in stdout From b6c69c27d68e2396efee69448ac255caaa66e2b8 Mon Sep 17 00:00:00 2001 From: AlightSoulmate <2314297572@qq.com> Date: Wed, 15 Apr 2026 23:59:31 +0800 Subject: [PATCH 11/14] Give up the warning --- codespell_lib/_spellchecker.py | 1 - codespell_lib/tests/test_basic.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/codespell_lib/_spellchecker.py b/codespell_lib/_spellchecker.py index 998b106eed..f85e3133fe 100644 --- a/codespell_lib/_spellchecker.py +++ b/codespell_lib/_spellchecker.py @@ -56,7 +56,6 @@ def build_dict( for line in f: left, pound, _ = line.partition("#") if pound and left and left[-1] not in (' ', '\t'): - print(f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}") continue line = left.strip() diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index f6625bcfbe..2b36cb9615 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -1554,5 +1554,4 @@ def test_dict_comments( code, stdout, _ = result assert code == 1 assert "abandonned ==> abandoned" in stdout - assert "occured ==> occurred" not in stdout - assert "missing spaces before #" in stdout \ No newline at end of file + assert "occured ==> occurred" not in stdout \ No newline at end of file From facfc4b3a44e8475461f37bacb5b4c8d2ba2ec83 Mon Sep 17 00:00:00 2001 From: AlightSoulmate <2314297572@qq.com> Date: Thu, 16 Apr 2026 00:01:53 +0800 Subject: [PATCH 12/14] Give up the warning --- codespell_lib/_spellchecker.py | 3 --- codespell_lib/tests/test_basic.py | 1 - 2 files changed, 4 deletions(-) diff --git a/codespell_lib/_spellchecker.py b/codespell_lib/_spellchecker.py index ff70a8f6e3..40f81c54cf 100644 --- a/codespell_lib/_spellchecker.py +++ b/codespell_lib/_spellchecker.py @@ -56,9 +56,6 @@ def build_dict( for line in f: left, pound, _ = line.partition("#") if pound and left and left[-1] not in (" ", "\t"): - print( - f"WARNING: {filename}: missing spaces before #: {line.rstrip()!r}" - ) continue line = left.strip() diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index dfa6673f3f..1b0c2b0407 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -1555,4 +1555,3 @@ def test_dict_comments( assert code == 1 assert "abandonned ==> abandoned" in stdout assert "occured ==> occurred" not in stdout - assert "missing spaces before #" in stdout From e90ae9383a13fb86b4d3baf23bc4046873741897 Mon Sep 17 00:00:00 2001 From: AlightSoulmate <2314297572@qq.com> Date: Thu, 16 Apr 2026 00:20:57 +0800 Subject: [PATCH 13/14] Updated README.rst --- README.rst | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/README.rst b/README.rst index a9562e8f2b..84967bec5c 100644 --- a/README.rst +++ b/README.rst @@ -293,6 +293,34 @@ applied directly, but should instead be manually inspected. E.g.: clas->class, clash, disabled because of name clash in c++ +Comments in dictionaries +---------------------------- + +Dictionary files may contain comments. + +1. Pure comment:: + + # comment + #comment + +2. Inline comment must be preceded by whitespace:: + + abondon->abandon #comment + abondon->abandon # comment + + The ``#`` character is treated as the start of the comment only if it is + preceded by whitespace. + +3. Invalid comment examples:: + + abondon->abandon#comment + thenumberone->the#one + the#one->thenumberone + + In such cases, the whole line is considered malformed and will be ignored. + +4. Blank lines are also ignored. + Development setup ----------------- From d07c12407dd1b2380c76269a973795d202295661 Mon Sep 17 00:00:00 2001 From: AlightSoulmate <2314297572@qq.com> Date: Thu, 16 Apr 2026 12:38:30 +0800 Subject: [PATCH 14/14] Add test cases --- codespell_lib/tests/test_basic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index 1b0c2b0407..7a23c3eb06 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -1543,7 +1543,9 @@ def test_dict_comments( "\r\n" "abandonned->abandoned # inline comment\n" "occured->occurred# invalid inline comment\n" - "abil#ity->ability # hash in illegal position\n", + "abil#ity->ability # hash in illegal position\n" + "ability->#ability # hash in illegal position\n" + "abilityability # no arrow\n", encoding="utf-8", )