Skip to content

Commit afec5e1

Browse files
committed
apply p7zip-project#232 to p7zip17 also
1 parent a45b883 commit afec5e1

File tree

1 file changed

+137
-68
lines changed

1 file changed

+137
-68
lines changed

CPP/7zip/Archive/Zip/ZipItem.cpp

Lines changed: 137 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -356,83 +356,152 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo
356356
}
357357

358358
#if (!defined _WIN32) && (!defined __CYGWIN__) && (!defined __APPLE__)
359+
359360
// Convert OEM char set to UTF-8 if needed
360361
// Use system locale to select code page
361362

362-
Byte hostOS = GetHostOS();
363-
if (!isUtf8 && ((hostOS == NFileHeader::NHostOS::kFAT) || (hostOS == NFileHeader::NHostOS::kNTFS))) {
364-
365-
const char *oemcp;
366-
oemcp = getenv("OEMCP");
367-
if (!oemcp) {
368-
oemcp = "CP437\0"; // CP name is 6 chars max
369-
370-
const char *lc_to_cp_table[] = {
371-
"af_ZA", "CP850", "ar_SA", "CP720", "ar_LB", "CP720", "ar_EG", "CP720",
372-
"ar_DZ", "CP720", "ar_BH", "CP720", "ar_IQ", "CP720", "ar_JO", "CP720",
373-
"ar_KW", "CP720", "ar_LY", "CP720", "ar_MA", "CP720", "ar_OM", "CP720",
374-
"ar_QA", "CP720", "ar_SY", "CP720", "ar_TN", "CP720", "ar_AE", "CP720",
375-
"ar_YE", "CP720","ast_ES", "CP850", "az_AZ", "CP866", "az_AZ", "CP857",
376-
"be_BY", "CP866", "bg_BG", "CP866", "br_FR", "CP850", "ca_ES", "CP850",
377-
"zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP850", "cs_CZ", "CP852",
378-
"cy_GB", "CP850", "da_DK", "CP850", "de_AT", "CP850", "de_LI", "CP850",
379-
"de_LU", "CP850", "de_CH", "CP850", "de_DE", "CP850", "el_GR", "CP737",
380-
"en_AU", "CP850", "en_CA", "CP850", "en_GB", "CP850", "en_IE", "CP850",
381-
"en_JM", "CP850", "en_BZ", "CP850", "en_PH", "CP437", "en_ZA", "CP437",
382-
"en_TT", "CP850", "en_US", "CP437", "en_ZW", "CP437", "en_NZ", "CP850",
383-
"es_PA", "CP850", "es_BO", "CP850", "es_CR", "CP850", "es_DO", "CP850",
384-
"es_SV", "CP850", "es_EC", "CP850", "es_GT", "CP850", "es_HN", "CP850",
385-
"es_NI", "CP850", "es_CL", "CP850", "es_MX", "CP850", "es_ES", "CP850",
386-
"es_CO", "CP850", "es_ES", "CP850", "es_PE", "CP850", "es_AR", "CP850",
387-
"es_PR", "CP850", "es_VE", "CP850", "es_UY", "CP850", "es_PY", "CP850",
388-
"et_EE", "CP775", "eu_ES", "CP850", "fa_IR", "CP720", "fi_FI", "CP850",
389-
"fo_FO", "CP850", "fr_FR", "CP850", "fr_BE", "CP850", "fr_CA", "CP850",
390-
"fr_LU", "CP850", "fr_MC", "CP850", "fr_CH", "CP850", "ga_IE", "CP437",
391-
"gd_GB", "CP850", "gv_IM", "CP850", "gl_ES", "CP850", "he_IL", "CP862",
392-
"hr_HR", "CP852", "hu_HU", "CP852", "id_ID", "CP850", "is_IS", "CP850",
393-
"it_IT", "CP850", "it_CH", "CP850", "iv_IV", "CP437", "ja_JP", "CP932",
394-
"kk_KZ", "CP866", "ko_KR", "CP949", "ky_KG", "CP866", "lt_LT", "CP775",
395-
"lv_LV", "CP775", "mk_MK", "CP866", "mn_MN", "CP866", "ms_BN", "CP850",
396-
"ms_MY", "CP850", "nl_BE", "CP850", "nl_NL", "CP850", "nl_SR", "CP850",
397-
"nn_NO", "CP850", "nb_NO", "CP850", "pl_PL", "CP852", "pt_BR", "CP850",
398-
"pt_PT", "CP850", "rm_CH", "CP850", "ro_RO", "CP852", "ru_RU", "CP866",
399-
"sk_SK", "CP852", "sl_SI", "CP852", "sq_AL", "CP852", "sr_RS", "CP855",
400-
"sr_RS", "CP852", "sv_SE", "CP850", "sv_FI", "CP850", "sw_KE", "CP437",
401-
"th_TH", "CP874", "tr_TR", "CP857", "tt_RU", "CP866", "uk_UA", "CP866",
402-
"ur_PK", "CP720", "uz_UZ", "CP866", "uz_UZ", "CP857", "vi_VN", "CP1258",
403-
"wa_BE", "CP850", "zh_HK", "CP950", "zh_SG", "CP936"};
404-
int table_len = sizeof(lc_to_cp_table) / sizeof(char *);
405-
int lc_len, i;
406-
407-
char *lc = setlocale(LC_CTYPE, "");
408-
409-
if (lc && lc[0]) {
410-
// Compare up to the dot, if it exists, e.g. en_US.UTF-8
411-
for (lc_len = 0; lc[lc_len] != '.' && lc[lc_len] != '\0'; ++lc_len)
412-
;
413-
for (i = 0; i < table_len; i += 2)
414-
if (strncmp(lc, lc_to_cp_table[i], lc_len) == 0)
415-
oemcp = lc_to_cp_table[i + 1];
416-
}
363+
// locale -> code page translation tables generated from Wine source code
364+
365+
const char *lcToOemTable[] = {
366+
"af_ZA", "CP850", "ar_SA", "CP720", "ar_LB", "CP720", "ar_EG", "CP720",
367+
"ar_DZ", "CP720", "ar_BH", "CP720", "ar_IQ", "CP720", "ar_JO", "CP720",
368+
"ar_KW", "CP720", "ar_LY", "CP720", "ar_MA", "CP720", "ar_OM", "CP720",
369+
"ar_QA", "CP720", "ar_SY", "CP720", "ar_TN", "CP720", "ar_AE", "CP720",
370+
"ar_YE", "CP720", "ast_ES", "CP850", "az_AZ", "CP866", "az_AZ", "CP857",
371+
"be_BY", "CP866", "bg_BG", "CP866", "br_FR", "CP850", "ca_ES", "CP850",
372+
"zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP850", "cs_CZ", "CP852",
373+
"cy_GB", "CP850", "da_DK", "CP850", "de_AT", "CP850", "de_LI", "CP850",
374+
"de_LU", "CP850", "de_CH", "CP850", "de_DE", "CP850", "el_GR", "CP737",
375+
"en_AU", "CP850", "en_CA", "CP850", "en_GB", "CP850", "en_IE", "CP850",
376+
"en_JM", "CP850", "en_BZ", "CP850", "en_PH", "CP437", "en_ZA", "CP437",
377+
"en_TT", "CP850", "en_US", "CP437", "en_ZW", "CP437", "en_NZ", "CP850",
378+
"es_PA", "CP850", "es_BO", "CP850", "es_CR", "CP850", "es_DO", "CP850",
379+
"es_SV", "CP850", "es_EC", "CP850", "es_GT", "CP850", "es_HN", "CP850",
380+
"es_NI", "CP850", "es_CL", "CP850", "es_MX", "CP850", "es_ES", "CP850",
381+
"es_CO", "CP850", "es_ES", "CP850", "es_PE", "CP850", "es_AR", "CP850",
382+
"es_PR", "CP850", "es_VE", "CP850", "es_UY", "CP850", "es_PY", "CP850",
383+
"et_EE", "CP775", "eu_ES", "CP850", "fa_IR", "CP720", "fi_FI", "CP850",
384+
"fo_FO", "CP850", "fr_FR", "CP850", "fr_BE", "CP850", "fr_CA", "CP850",
385+
"fr_LU", "CP850", "fr_MC", "CP850", "fr_CH", "CP850", "ga_IE", "CP437",
386+
"gd_GB", "CP850", "gv_IM", "CP850", "gl_ES", "CP850", "he_IL", "CP862",
387+
"hr_HR", "CP852", "hu_HU", "CP852", "id_ID", "CP850", "is_IS", "CP850",
388+
"it_IT", "CP850", "it_CH", "CP850", "iv_IV", "CP437", "ja_JP", "CP932",
389+
"kk_KZ", "CP866", "ko_KR", "CP949", "ky_KG", "CP866", "lt_LT", "CP775",
390+
"lv_LV", "CP775", "mk_MK", "CP866", "mn_MN", "CP866", "ms_BN", "CP850",
391+
"ms_MY", "CP850", "nl_BE", "CP850", "nl_NL", "CP850", "nl_SR", "CP850",
392+
"nn_NO", "CP850", "nb_NO", "CP850", "pl_PL", "CP852", "pt_BR", "CP850",
393+
"pt_PT", "CP850", "rm_CH", "CP850", "ro_RO", "CP852", "ru_RU", "CP866",
394+
"sk_SK", "CP852", "sl_SI", "CP852", "sq_AL", "CP852", "sr_RS", "CP855",
395+
"sr_RS", "CP852", "sv_SE", "CP850", "sv_FI", "CP850", "sw_KE", "CP437",
396+
"th_TH", "CP874", "tr_TR", "CP857", "tt_RU", "CP866", "uk_UA", "CP866",
397+
"ur_PK", "CP720", "uz_UZ", "CP866", "uz_UZ", "CP857", "vi_VN", "CP1258",
398+
"wa_BE", "CP850", "zh_HK", "CP950", "zh_SG", "CP936"};
399+
400+
const char *lcToAnsiTable[] = {
401+
"af_ZA", "CP1252", "ar_SA", "CP1256", "ar_LB", "CP1256", "ar_EG", "CP1256",
402+
"ar_DZ", "CP1256", "ar_BH", "CP1256", "ar_IQ", "CP1256", "ar_JO", "CP1256",
403+
"ar_KW", "CP1256", "ar_LY", "CP1256", "ar_MA", "CP1256", "ar_OM", "CP1256",
404+
"ar_QA", "CP1256", "ar_SY", "CP1256", "ar_TN", "CP1256", "ar_AE", "CP1256",
405+
"ar_YE", "CP1256","ast_ES", "CP1252", "az_AZ", "CP1251", "az_AZ", "CP1254",
406+
"be_BY", "CP1251", "bg_BG", "CP1251", "br_FR", "CP1252", "ca_ES", "CP1252",
407+
"zh_CN", "CP936", "zh_TW", "CP950", "kw_GB", "CP1252", "cs_CZ", "CP1250",
408+
"cy_GB", "CP1252", "da_DK", "CP1252", "de_AT", "CP1252", "de_LI", "CP1252",
409+
"de_LU", "CP1252", "de_CH", "CP1252", "de_DE", "CP1252", "el_GR", "CP1253",
410+
"en_AU", "CP1252", "en_CA", "CP1252", "en_GB", "CP1252", "en_IE", "CP1252",
411+
"en_JM", "CP1252", "en_BZ", "CP1252", "en_PH", "CP1252", "en_ZA", "CP1252",
412+
"en_TT", "CP1252", "en_US", "CP1252", "en_ZW", "CP1252", "en_NZ", "CP1252",
413+
"es_PA", "CP1252", "es_BO", "CP1252", "es_CR", "CP1252", "es_DO", "CP1252",
414+
"es_SV", "CP1252", "es_EC", "CP1252", "es_GT", "CP1252", "es_HN", "CP1252",
415+
"es_NI", "CP1252", "es_CL", "CP1252", "es_MX", "CP1252", "es_ES", "CP1252",
416+
"es_CO", "CP1252", "es_ES", "CP1252", "es_PE", "CP1252", "es_AR", "CP1252",
417+
"es_PR", "CP1252", "es_VE", "CP1252", "es_UY", "CP1252", "es_PY", "CP1252",
418+
"et_EE", "CP1257", "eu_ES", "CP1252", "fa_IR", "CP1256", "fi_FI", "CP1252",
419+
"fo_FO", "CP1252", "fr_FR", "CP1252", "fr_BE", "CP1252", "fr_CA", "CP1252",
420+
"fr_LU", "CP1252", "fr_MC", "CP1252", "fr_CH", "CP1252", "ga_IE", "CP1252",
421+
"gd_GB", "CP1252", "gv_IM", "CP1252", "gl_ES", "CP1252", "he_IL", "CP1255",
422+
"hr_HR", "CP1250", "hu_HU", "CP1250", "id_ID", "CP1252", "is_IS", "CP1252",
423+
"it_IT", "CP1252", "it_CH", "CP1252", "iv_IV", "CP1252", "ja_JP", "CP932",
424+
"kk_KZ", "CP1251", "ko_KR", "CP949", "ky_KG", "CP1251", "lt_LT", "CP1257",
425+
"lv_LV", "CP1257", "mk_MK", "CP1251", "mn_MN", "CP1251", "ms_BN", "CP1252",
426+
"ms_MY", "CP1252", "nl_BE", "CP1252", "nl_NL", "CP1252", "nl_SR", "CP1252",
427+
"nn_NO", "CP1252", "nb_NO", "CP1252", "pl_PL", "CP1250", "pt_BR", "CP1252",
428+
"pt_PT", "CP1252", "rm_CH", "CP1252", "ro_RO", "CP1250", "ru_RU", "CP1251",
429+
"sk_SK", "CP1250", "sl_SI", "CP1250", "sq_AL", "CP1250", "sr_RS", "CP1251",
430+
"sr_RS", "CP1250", "sv_SE", "CP1252", "sv_FI", "CP1252", "sw_KE", "CP1252",
431+
"th_TH", "CP874", "tr_TR", "CP1254", "tt_RU", "CP1251", "uk_UA", "CP1251",
432+
"ur_PK", "CP1256", "uz_UZ", "CP1251", "uz_UZ", "CP1254", "vi_VN", "CP1258",
433+
"wa_BE", "CP1252", "zh_HK", "CP950", "zh_SG", "CP936"};
434+
435+
bool isAnsi = false;
436+
bool isOem = false;
437+
438+
if (!isUtf8 &&
439+
MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS &&
440+
MadeByVersion.Version >= 20) {
441+
isAnsi = true;
442+
} else if (!isUtf8 &&
443+
(MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS ||
444+
MadeByVersion.HostOS == NFileHeader::NHostOS::kFAT)) {
445+
isOem = true;
446+
}
447+
448+
if (isOem || isAnsi) {
449+
450+
const char *legacyCp = nullptr;
451+
int tableLen = sizeof(isOem ? lcToOemTable : lcToAnsiTable) / sizeof(char *);
452+
int lcLen = 0, i;
453+
454+
// Detect required code page name from current locale
455+
char *lc = setlocale(LC_CTYPE, "");
456+
457+
if (lc && lc[0]) {
458+
// Compare up to the dot, if it exists, e.g. en_US.UTF-8
459+
for (lcLen = 0; lc[lcLen] != '.' && lc[lcLen] != ':' && lc[lcLen] != '\0'; ++lcLen);
460+
461+
for (i = 0; i < tableLen; i += 2)
462+
if (strncmp(lc, (isOem ? lcToOemTable[i] : lcToAnsiTable[i]), lcLen) == 0) {
463+
legacyCp = isOem ? lcToOemTable[i + 1] : lcToAnsiTable[i + 1];
464+
break; // Stop searching once a match is found
465+
}
417466
}
418467

419-
iconv_t cd;
420-
if ((cd = iconv_open("UTF-8", oemcp)) != (iconv_t)-1) {
468+
if (legacyCp) {
469+
iconv_t cd;
470+
if ((cd = iconv_open("UTF-8", legacyCp)) != (iconv_t)-1) {
421471

422-
AString s_utf8;
423-
const char* src = s.Ptr();
424-
size_t slen = s.Len();
425-
size_t dlen = slen * 4;
426-
const char* dest = s_utf8.GetBuf_SetEnd(dlen + 1); // (source length * 4) + null termination
472+
AString sUtf8;
427473

428-
size_t done = iconv(cd, (char**)&src, &slen, (char**)&dest, &dlen);
429-
bzero((size_t*)dest + done, 1);
474+
size_t slen = s.Len();
475+
char* src = const_cast<char*>(s.Ptr());
430476

431-
iconv_close(cd);
477+
size_t dlen = slen * 4 + 1; // (source length * 4) + null termination
478+
char* dst = sUtf8.GetBuf_SetEnd(dlen);
479+
const char* dstStart = dst;
432480

433-
if (ConvertUTF8ToUnicode(s_utf8, res) || ignore_Utf8_Errors)
434-
return;
435-
}
481+
memset(dst, 0, dlen);
482+
483+
size_t done = iconv(cd, &src, &slen, &dst, &dlen);
484+
485+
if (done == (size_t)-1) {
486+
iconv_close(cd);
487+
488+
// iconv failed. Falling back to default behavior
489+
MultiByteToUnicodeString2(res, s, useSpecifiedCodePage ? codePage : GetCodePage());
490+
return;
491+
}
492+
493+
// Null-terminate the result
494+
*dst = '\0';
495+
496+
iconv_close(cd);
497+
498+
AString sUtf8CorrectLength;
499+
unsigned dstCorrectLength = dst - dstStart;
500+
sUtf8CorrectLength.SetFrom(sUtf8, dstCorrectLength);
501+
if (ConvertUTF8ToUnicode(sUtf8CorrectLength, res) /*|| ignore_Utf8_Errors*/)
502+
return;
503+
}
504+
}
436505
}
437506
#endif
438507

0 commit comments

Comments
 (0)