diff --git a/Lib/email/charset.py b/Lib/email/charset.py index 5036c3f58a5633..42f304481d9c08 100644 --- a/Lib/email/charset.py +++ b/Lib/email/charset.py @@ -16,6 +16,7 @@ from email import errors from email.encoders import encode_7or8bit +from encodings.aliases import aliases as ALIASES # Flags for types of header encodings @@ -61,35 +62,11 @@ 'utf-8': (SHORTEST, BASE64, 'utf-8'), } -# Aliases for other commonly-used names for character sets. Map -# them to the real ones used in email. -ALIASES = { - 'latin_1': 'iso-8859-1', +# The email module uses the slower latin-1 encoding +ALIASES.update({ 'latin-1': 'iso-8859-1', - 'latin_2': 'iso-8859-2', - 'latin-2': 'iso-8859-2', - 'latin_3': 'iso-8859-3', - 'latin-3': 'iso-8859-3', - 'latin_4': 'iso-8859-4', - 'latin-4': 'iso-8859-4', - 'latin_5': 'iso-8859-9', - 'latin-5': 'iso-8859-9', - 'latin_6': 'iso-8859-10', - 'latin-6': 'iso-8859-10', - 'latin_7': 'iso-8859-13', - 'latin-7': 'iso-8859-13', - 'latin_8': 'iso-8859-14', - 'latin-8': 'iso-8859-14', - 'latin_9': 'iso-8859-15', - 'latin-9': 'iso-8859-15', - 'latin_10':'iso-8859-16', - 'latin-10':'iso-8859-16', - 'cp949': 'ks_c_5601-1987', - 'euc_jp': 'euc-jp', - 'euc_kr': 'euc-kr', - 'ascii': 'us-ascii', - } - + 'latin_1': 'iso-8859-1', +}) # Map charsets to their Unicode codec strings. CODEC_MAP = { diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py index 4ecb6b6e297a13..474d74ea3dc191 100644 --- a/Lib/encodings/aliases.py +++ b/Lib/encodings/aliases.py @@ -318,6 +318,7 @@ 'iso_ir_157' : 'iso8859_10', 'l6' : 'iso8859_10', 'latin6' : 'iso8859_10', + 'latin_6' : 'iso8859_10', # iso8859_11 codec 'thai' : 'iso8859_11', @@ -328,6 +329,7 @@ 'iso_8859_13' : 'iso8859_13', 'l7' : 'iso8859_13', 'latin7' : 'iso8859_13', + 'latin_7' : 'iso8859_13', # iso8859_14 codec 'iso_8859_14' : 'iso8859_14', @@ -336,11 +338,13 @@ 'iso_ir_199' : 'iso8859_14', 'l8' : 'iso8859_14', 'latin8' : 'iso8859_14', + 'latin_8' : 'iso8859_14', # iso8859_15 codec 'iso_8859_15' : 'iso8859_15', 'l9' : 'iso8859_15', 'latin9' : 'iso8859_15', + 'latin_9' : 'iso8859_15', # iso8859_16 codec 'iso_8859_16' : 'iso8859_16', @@ -348,6 +352,7 @@ 'iso_ir_226' : 'iso8859_16', 'l10' : 'iso8859_16', 'latin10' : 'iso8859_16', + 'latin_10' : 'iso8859_16', # iso8859_2 codec 'csisolatin2' : 'iso8859_2', @@ -356,6 +361,7 @@ 'iso_ir_101' : 'iso8859_2', 'l2' : 'iso8859_2', 'latin2' : 'iso8859_2', + 'latin_2' : 'iso8859_2', # iso8859_3 codec 'csisolatin3' : 'iso8859_3', @@ -364,6 +370,7 @@ 'iso_ir_109' : 'iso8859_3', 'l3' : 'iso8859_3', 'latin3' : 'iso8859_3', + 'latin_3' : 'iso8859_3', # iso8859_4 codec 'csisolatin4' : 'iso8859_4', @@ -372,6 +379,7 @@ 'iso_ir_110' : 'iso8859_4', 'l4' : 'iso8859_4', 'latin4' : 'iso8859_4', + 'latin_4' : 'iso8859_4', # iso8859_5 codec 'csisolatincyrillic' : 'iso8859_5', @@ -415,6 +423,7 @@ 'iso_ir_148' : 'iso8859_9', 'l5' : 'iso8859_9', 'latin5' : 'iso8859_9', + 'latin_5' : 'iso8859_9', # johab codec 'cp1361' : 'johab', diff --git a/Misc/NEWS.d/next/Library/2025-07-16-09-45-58.gh-issue-53144.mrKwMW.rst b/Misc/NEWS.d/next/Library/2025-07-16-09-45-58.gh-issue-53144.mrKwMW.rst new file mode 100644 index 00000000000000..f97291a0c435bc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-16-09-45-58.gh-issue-53144.mrKwMW.rst @@ -0,0 +1,2 @@ +:mod:`email`: Use :mod:`!encodings.aliases` for alias lookup +:mod:`!encodings.aliases`: Add ``latin_*`` aliases.