diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index 298177eb8003a7..245d941a8d13ce 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -58,11 +58,10 @@ def normalize_encoding(encoding): chars = [] punct = False for c in encoding: - if c.isalnum() or c == '.': + if c.isascii() and (c.isalnum() or c == '.'): if punct and chars: chars.append('_') - if c.isascii(): - chars.append(c) + chars.append(c) punct = False else: punct = True diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index d8666f7290e72e..fa777480664449 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -3888,6 +3888,12 @@ def search_function(encoding): self.assertEqual(FOUND, codecs.lookup('AAA---8')) self.assertEqual(FOUND, codecs.lookup('AAA 8')) self.assertEqual(FOUND, codecs.lookup('aaa\xe9\u20ac-8')) + self.assertEqual(FOUND, codecs.lookup('aaa\xe98')) + self.assertEqual(FOUND, codecs.lookup('aaa\u20ac8')) + self.assertEqual(FOUND, codecs.lookup('aaa-\xe9-8')) + self.assertEqual(FOUND, codecs.lookup('aaa-\u20ac-8')) + self.assertEqual(FOUND, codecs.lookup('aaa-8-\xe9')) + self.assertEqual(FOUND, codecs.lookup('aaa-8-\u20ac')) self.assertEqual(NOT_FOUND, codecs.lookup('AAA.8')) self.assertEqual(NOT_FOUND, codecs.lookup('AAA...8')) self.assertEqual(NOT_FOUND, codecs.lookup('BBB-8')) @@ -3899,6 +3905,12 @@ def test_encodings_normalize_encoding(self): normalize = encodings.normalize_encoding self.assertEqual(normalize('utf_8'), 'utf_8') self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8') + self.assertEqual(normalize('utf\xe98'), 'utf_8') + self.assertEqual(normalize('utf\u20ac8'), 'utf_8') + self.assertEqual(normalize('utf-\xe9-8'), 'utf_8') + self.assertEqual(normalize('utf-\u20ac-8'), 'utf_8') + self.assertEqual(normalize('utf-8-\xe9'), 'utf_8') + self.assertEqual(normalize('utf-8-\u20ac'), 'utf_8') self.assertEqual(normalize('utf 8'), 'utf_8') # encodings.normalize_encoding() doesn't convert # characters to lower case. diff --git a/Misc/NEWS.d/next/Library/2025-07-17-11-59-10.gh-issue-136736.kzQ_dY.rst b/Misc/NEWS.d/next/Library/2025-07-17-11-59-10.gh-issue-136736.kzQ_dY.rst new file mode 100644 index 00000000000000..5596170549e797 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-17-11-59-10.gh-issue-136736.kzQ_dY.rst @@ -0,0 +1,2 @@ +Fix handling alphanumerical non-ASCII characters in +:func:`encodings.normalize_encoding`.