From 5beb8704a68e57388ec8926d67283b821d1e8e10 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Wed, 16 Jul 2025 09:46:42 +0100 Subject: [PATCH 1/2] Commit --- Lib/email/charset.py | 33 +++---------------- Lib/encodings/aliases.py | 9 +++++ ...5-07-16-09-45-58.gh-issue-53144.mrKwMW.rst | 2 ++ 3 files changed, 16 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-07-16-09-45-58.gh-issue-53144.mrKwMW.rst diff --git a/Lib/email/charset.py b/Lib/email/charset.py index 5036c3f58a5633..42f304481d9c08 100644 --- a/Lib/email/charset.py +++ b/Lib/email/charset.py @@ -16,6 +16,7 @@ from email import errors from email.encoders import encode_7or8bit +from encodings.aliases import aliases as ALIASES # Flags for types of header encodings @@ -61,35 +62,11 @@ 'utf-8': (SHORTEST, BASE64, 'utf-8'), } -# Aliases for other commonly-used names for character sets. Map -# them to the real ones used in email. -ALIASES = { - 'latin_1': 'iso-8859-1', +# The email module uses the slower latin-1 encoding +ALIASES.update({ 'latin-1': 'iso-8859-1', - 'latin_2': 'iso-8859-2', - 'latin-2': 'iso-8859-2', - 'latin_3': 'iso-8859-3', - 'latin-3': 'iso-8859-3', - 'latin_4': 'iso-8859-4', - 'latin-4': 'iso-8859-4', - 'latin_5': 'iso-8859-9', - 'latin-5': 'iso-8859-9', - 'latin_6': 'iso-8859-10', - 'latin-6': 'iso-8859-10', - 'latin_7': 'iso-8859-13', - 'latin-7': 'iso-8859-13', - 'latin_8': 'iso-8859-14', - 'latin-8': 'iso-8859-14', - 'latin_9': 'iso-8859-15', - 'latin-9': 'iso-8859-15', - 'latin_10':'iso-8859-16', - 'latin-10':'iso-8859-16', - 'cp949': 'ks_c_5601-1987', - 'euc_jp': 'euc-jp', - 'euc_kr': 'euc-kr', - 'ascii': 'us-ascii', - } - + 'latin_1': 'iso-8859-1', +}) # Map charsets to their Unicode codec strings. CODEC_MAP = { diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py index 4ecb6b6e297a13..474d74ea3dc191 100644 --- a/Lib/encodings/aliases.py +++ b/Lib/encodings/aliases.py @@ -318,6 +318,7 @@ 'iso_ir_157' : 'iso8859_10', 'l6' : 'iso8859_10', 'latin6' : 'iso8859_10', + 'latin_6' : 'iso8859_10', # iso8859_11 codec 'thai' : 'iso8859_11', @@ -328,6 +329,7 @@ 'iso_8859_13' : 'iso8859_13', 'l7' : 'iso8859_13', 'latin7' : 'iso8859_13', + 'latin_7' : 'iso8859_13', # iso8859_14 codec 'iso_8859_14' : 'iso8859_14', @@ -336,11 +338,13 @@ 'iso_ir_199' : 'iso8859_14', 'l8' : 'iso8859_14', 'latin8' : 'iso8859_14', + 'latin_8' : 'iso8859_14', # iso8859_15 codec 'iso_8859_15' : 'iso8859_15', 'l9' : 'iso8859_15', 'latin9' : 'iso8859_15', + 'latin_9' : 'iso8859_15', # iso8859_16 codec 'iso_8859_16' : 'iso8859_16', @@ -348,6 +352,7 @@ 'iso_ir_226' : 'iso8859_16', 'l10' : 'iso8859_16', 'latin10' : 'iso8859_16', + 'latin_10' : 'iso8859_16', # iso8859_2 codec 'csisolatin2' : 'iso8859_2', @@ -356,6 +361,7 @@ 'iso_ir_101' : 'iso8859_2', 'l2' : 'iso8859_2', 'latin2' : 'iso8859_2', + 'latin_2' : 'iso8859_2', # iso8859_3 codec 'csisolatin3' : 'iso8859_3', @@ -364,6 +370,7 @@ 'iso_ir_109' : 'iso8859_3', 'l3' : 'iso8859_3', 'latin3' : 'iso8859_3', + 'latin_3' : 'iso8859_3', # iso8859_4 codec 'csisolatin4' : 'iso8859_4', @@ -372,6 +379,7 @@ 'iso_ir_110' : 'iso8859_4', 'l4' : 'iso8859_4', 'latin4' : 'iso8859_4', + 'latin_4' : 'iso8859_4', # iso8859_5 codec 'csisolatincyrillic' : 'iso8859_5', @@ -415,6 +423,7 @@ 'iso_ir_148' : 'iso8859_9', 'l5' : 'iso8859_9', 'latin5' : 'iso8859_9', + 'latin_5' : 'iso8859_9', # johab codec 'cp1361' : 'johab', diff --git a/Misc/NEWS.d/next/Library/2025-07-16-09-45-58.gh-issue-53144.mrKwMW.rst b/Misc/NEWS.d/next/Library/2025-07-16-09-45-58.gh-issue-53144.mrKwMW.rst new file mode 100644 index 00000000000000..f97291a0c435bc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-16-09-45-58.gh-issue-53144.mrKwMW.rst @@ -0,0 +1,2 @@ +:mod:`email`: Use :mod:`!encodings.aliases` for alias lookup +:mod:`!encodings.aliases`: Add ``latin_*`` aliases. From 21be51f68d46223b1fe4ae2bc0e687c0f33544dd Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Sun, 27 Jul 2025 21:10:43 +0200 Subject: [PATCH 2/2] Reuse --- Lib/email/charset.py | 33 ++++++++++++++++--- ...5-07-16-09-45-58.gh-issue-53144.mrKwMW.rst | 3 +- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/Lib/email/charset.py b/Lib/email/charset.py index 42f304481d9c08..5036c3f58a5633 100644 --- a/Lib/email/charset.py +++ b/Lib/email/charset.py @@ -16,7 +16,6 @@ from email import errors from email.encoders import encode_7or8bit -from encodings.aliases import aliases as ALIASES # Flags for types of header encodings @@ -62,11 +61,35 @@ 'utf-8': (SHORTEST, BASE64, 'utf-8'), } -# The email module uses the slower latin-1 encoding -ALIASES.update({ - 'latin-1': 'iso-8859-1', +# Aliases for other commonly-used names for character sets. Map +# them to the real ones used in email. +ALIASES = { 'latin_1': 'iso-8859-1', -}) + 'latin-1': 'iso-8859-1', + 'latin_2': 'iso-8859-2', + 'latin-2': 'iso-8859-2', + 'latin_3': 'iso-8859-3', + 'latin-3': 'iso-8859-3', + 'latin_4': 'iso-8859-4', + 'latin-4': 'iso-8859-4', + 'latin_5': 'iso-8859-9', + 'latin-5': 'iso-8859-9', + 'latin_6': 'iso-8859-10', + 'latin-6': 'iso-8859-10', + 'latin_7': 'iso-8859-13', + 'latin-7': 'iso-8859-13', + 'latin_8': 'iso-8859-14', + 'latin-8': 'iso-8859-14', + 'latin_9': 'iso-8859-15', + 'latin-9': 'iso-8859-15', + 'latin_10':'iso-8859-16', + 'latin-10':'iso-8859-16', + 'cp949': 'ks_c_5601-1987', + 'euc_jp': 'euc-jp', + 'euc_kr': 'euc-kr', + 'ascii': 'us-ascii', + } + # Map charsets to their Unicode codec strings. CODEC_MAP = { diff --git a/Misc/NEWS.d/next/Library/2025-07-16-09-45-58.gh-issue-53144.mrKwMW.rst b/Misc/NEWS.d/next/Library/2025-07-16-09-45-58.gh-issue-53144.mrKwMW.rst index f97291a0c435bc..f6c12916a6d30b 100644 --- a/Misc/NEWS.d/next/Library/2025-07-16-09-45-58.gh-issue-53144.mrKwMW.rst +++ b/Misc/NEWS.d/next/Library/2025-07-16-09-45-58.gh-issue-53144.mrKwMW.rst @@ -1,2 +1 @@ -:mod:`email`: Use :mod:`!encodings.aliases` for alias lookup -:mod:`!encodings.aliases`: Add ``latin_*`` aliases. +:mod:`!encodings.aliases`: Add ``latin_N`` aliases