@@ -356,83 +356,152 @@ void CItem::GetUnicodeString(UString &res, const AString &s, bool isComment, boo
356356 }
357357
358358 #if (!defined _WIN32) && (!defined __CYGWIN__) && (!defined __APPLE__)
359+
359360 // Convert OEM char set to UTF-8 if needed
360361 // Use system locale to select code page
361362
362- Byte hostOS = GetHostOS ();
363- if (!isUtf8 && ((hostOS == NFileHeader::NHostOS::kFAT ) || (hostOS == NFileHeader::NHostOS::kNTFS ))) {
364-
365- const char *oemcp;
366- oemcp = getenv (" OEMCP" );
367- if (!oemcp) {
368- oemcp = " CP437\0 " ; // CP name is 6 chars max
369-
370- const char *lc_to_cp_table[] = {
371- " af_ZA" , " CP850" , " ar_SA" , " CP720" , " ar_LB" , " CP720" , " ar_EG" , " CP720" ,
372- " ar_DZ" , " CP720" , " ar_BH" , " CP720" , " ar_IQ" , " CP720" , " ar_JO" , " CP720" ,
373- " ar_KW" , " CP720" , " ar_LY" , " CP720" , " ar_MA" , " CP720" , " ar_OM" , " CP720" ,
374- " ar_QA" , " CP720" , " ar_SY" , " CP720" , " ar_TN" , " CP720" , " ar_AE" , " CP720" ,
375- " ar_YE" , " CP720" ," ast_ES" , " CP850" , " az_AZ" , " CP866" , " az_AZ" , " CP857" ,
376- " be_BY" , " CP866" , " bg_BG" , " CP866" , " br_FR" , " CP850" , " ca_ES" , " CP850" ,
377- " zh_CN" , " CP936" , " zh_TW" , " CP950" , " kw_GB" , " CP850" , " cs_CZ" , " CP852" ,
378- " cy_GB" , " CP850" , " da_DK" , " CP850" , " de_AT" , " CP850" , " de_LI" , " CP850" ,
379- " de_LU" , " CP850" , " de_CH" , " CP850" , " de_DE" , " CP850" , " el_GR" , " CP737" ,
380- " en_AU" , " CP850" , " en_CA" , " CP850" , " en_GB" , " CP850" , " en_IE" , " CP850" ,
381- " en_JM" , " CP850" , " en_BZ" , " CP850" , " en_PH" , " CP437" , " en_ZA" , " CP437" ,
382- " en_TT" , " CP850" , " en_US" , " CP437" , " en_ZW" , " CP437" , " en_NZ" , " CP850" ,
383- " es_PA" , " CP850" , " es_BO" , " CP850" , " es_CR" , " CP850" , " es_DO" , " CP850" ,
384- " es_SV" , " CP850" , " es_EC" , " CP850" , " es_GT" , " CP850" , " es_HN" , " CP850" ,
385- " es_NI" , " CP850" , " es_CL" , " CP850" , " es_MX" , " CP850" , " es_ES" , " CP850" ,
386- " es_CO" , " CP850" , " es_ES" , " CP850" , " es_PE" , " CP850" , " es_AR" , " CP850" ,
387- " es_PR" , " CP850" , " es_VE" , " CP850" , " es_UY" , " CP850" , " es_PY" , " CP850" ,
388- " et_EE" , " CP775" , " eu_ES" , " CP850" , " fa_IR" , " CP720" , " fi_FI" , " CP850" ,
389- " fo_FO" , " CP850" , " fr_FR" , " CP850" , " fr_BE" , " CP850" , " fr_CA" , " CP850" ,
390- " fr_LU" , " CP850" , " fr_MC" , " CP850" , " fr_CH" , " CP850" , " ga_IE" , " CP437" ,
391- " gd_GB" , " CP850" , " gv_IM" , " CP850" , " gl_ES" , " CP850" , " he_IL" , " CP862" ,
392- " hr_HR" , " CP852" , " hu_HU" , " CP852" , " id_ID" , " CP850" , " is_IS" , " CP850" ,
393- " it_IT" , " CP850" , " it_CH" , " CP850" , " iv_IV" , " CP437" , " ja_JP" , " CP932" ,
394- " kk_KZ" , " CP866" , " ko_KR" , " CP949" , " ky_KG" , " CP866" , " lt_LT" , " CP775" ,
395- " lv_LV" , " CP775" , " mk_MK" , " CP866" , " mn_MN" , " CP866" , " ms_BN" , " CP850" ,
396- " ms_MY" , " CP850" , " nl_BE" , " CP850" , " nl_NL" , " CP850" , " nl_SR" , " CP850" ,
397- " nn_NO" , " CP850" , " nb_NO" , " CP850" , " pl_PL" , " CP852" , " pt_BR" , " CP850" ,
398- " pt_PT" , " CP850" , " rm_CH" , " CP850" , " ro_RO" , " CP852" , " ru_RU" , " CP866" ,
399- " sk_SK" , " CP852" , " sl_SI" , " CP852" , " sq_AL" , " CP852" , " sr_RS" , " CP855" ,
400- " sr_RS" , " CP852" , " sv_SE" , " CP850" , " sv_FI" , " CP850" , " sw_KE" , " CP437" ,
401- " th_TH" , " CP874" , " tr_TR" , " CP857" , " tt_RU" , " CP866" , " uk_UA" , " CP866" ,
402- " ur_PK" , " CP720" , " uz_UZ" , " CP866" , " uz_UZ" , " CP857" , " vi_VN" , " CP1258" ,
403- " wa_BE" , " CP850" , " zh_HK" , " CP950" , " zh_SG" , " CP936" };
404- int table_len = sizeof (lc_to_cp_table) / sizeof (char *);
405- int lc_len, i;
406-
407- char *lc = setlocale (LC_CTYPE, " " );
408-
409- if (lc && lc[0 ]) {
410- // Compare up to the dot, if it exists, e.g. en_US.UTF-8
411- for (lc_len = 0 ; lc[lc_len] != ' .' && lc[lc_len] != ' \0 ' ; ++lc_len)
412- ;
413- for (i = 0 ; i < table_len; i += 2 )
414- if (strncmp (lc, lc_to_cp_table[i], lc_len) == 0 )
415- oemcp = lc_to_cp_table[i + 1 ];
416- }
363+ // locale -> code page translation tables generated from Wine source code
364+
365+ const char *lcToOemTable[] = {
366+ " af_ZA" , " CP850" , " ar_SA" , " CP720" , " ar_LB" , " CP720" , " ar_EG" , " CP720" ,
367+ " ar_DZ" , " CP720" , " ar_BH" , " CP720" , " ar_IQ" , " CP720" , " ar_JO" , " CP720" ,
368+ " ar_KW" , " CP720" , " ar_LY" , " CP720" , " ar_MA" , " CP720" , " ar_OM" , " CP720" ,
369+ " ar_QA" , " CP720" , " ar_SY" , " CP720" , " ar_TN" , " CP720" , " ar_AE" , " CP720" ,
370+ " ar_YE" , " CP720" , " ast_ES" , " CP850" , " az_AZ" , " CP866" , " az_AZ" , " CP857" ,
371+ " be_BY" , " CP866" , " bg_BG" , " CP866" , " br_FR" , " CP850" , " ca_ES" , " CP850" ,
372+ " zh_CN" , " CP936" , " zh_TW" , " CP950" , " kw_GB" , " CP850" , " cs_CZ" , " CP852" ,
373+ " cy_GB" , " CP850" , " da_DK" , " CP850" , " de_AT" , " CP850" , " de_LI" , " CP850" ,
374+ " de_LU" , " CP850" , " de_CH" , " CP850" , " de_DE" , " CP850" , " el_GR" , " CP737" ,
375+ " en_AU" , " CP850" , " en_CA" , " CP850" , " en_GB" , " CP850" , " en_IE" , " CP850" ,
376+ " en_JM" , " CP850" , " en_BZ" , " CP850" , " en_PH" , " CP437" , " en_ZA" , " CP437" ,
377+ " en_TT" , " CP850" , " en_US" , " CP437" , " en_ZW" , " CP437" , " en_NZ" , " CP850" ,
378+ " es_PA" , " CP850" , " es_BO" , " CP850" , " es_CR" , " CP850" , " es_DO" , " CP850" ,
379+ " es_SV" , " CP850" , " es_EC" , " CP850" , " es_GT" , " CP850" , " es_HN" , " CP850" ,
380+ " es_NI" , " CP850" , " es_CL" , " CP850" , " es_MX" , " CP850" , " es_ES" , " CP850" ,
381+ " es_CO" , " CP850" , " es_ES" , " CP850" , " es_PE" , " CP850" , " es_AR" , " CP850" ,
382+ " es_PR" , " CP850" , " es_VE" , " CP850" , " es_UY" , " CP850" , " es_PY" , " CP850" ,
383+ " et_EE" , " CP775" , " eu_ES" , " CP850" , " fa_IR" , " CP720" , " fi_FI" , " CP850" ,
384+ " fo_FO" , " CP850" , " fr_FR" , " CP850" , " fr_BE" , " CP850" , " fr_CA" , " CP850" ,
385+ " fr_LU" , " CP850" , " fr_MC" , " CP850" , " fr_CH" , " CP850" , " ga_IE" , " CP437" ,
386+ " gd_GB" , " CP850" , " gv_IM" , " CP850" , " gl_ES" , " CP850" , " he_IL" , " CP862" ,
387+ " hr_HR" , " CP852" , " hu_HU" , " CP852" , " id_ID" , " CP850" , " is_IS" , " CP850" ,
388+ " it_IT" , " CP850" , " it_CH" , " CP850" , " iv_IV" , " CP437" , " ja_JP" , " CP932" ,
389+ " kk_KZ" , " CP866" , " ko_KR" , " CP949" , " ky_KG" , " CP866" , " lt_LT" , " CP775" ,
390+ " lv_LV" , " CP775" , " mk_MK" , " CP866" , " mn_MN" , " CP866" , " ms_BN" , " CP850" ,
391+ " ms_MY" , " CP850" , " nl_BE" , " CP850" , " nl_NL" , " CP850" , " nl_SR" , " CP850" ,
392+ " nn_NO" , " CP850" , " nb_NO" , " CP850" , " pl_PL" , " CP852" , " pt_BR" , " CP850" ,
393+ " pt_PT" , " CP850" , " rm_CH" , " CP850" , " ro_RO" , " CP852" , " ru_RU" , " CP866" ,
394+ " sk_SK" , " CP852" , " sl_SI" , " CP852" , " sq_AL" , " CP852" , " sr_RS" , " CP855" ,
395+ " sr_RS" , " CP852" , " sv_SE" , " CP850" , " sv_FI" , " CP850" , " sw_KE" , " CP437" ,
396+ " th_TH" , " CP874" , " tr_TR" , " CP857" , " tt_RU" , " CP866" , " uk_UA" , " CP866" ,
397+ " ur_PK" , " CP720" , " uz_UZ" , " CP866" , " uz_UZ" , " CP857" , " vi_VN" , " CP1258" ,
398+ " wa_BE" , " CP850" , " zh_HK" , " CP950" , " zh_SG" , " CP936" };
399+
400+ const char *lcToAnsiTable[] = {
401+ " af_ZA" , " CP1252" , " ar_SA" , " CP1256" , " ar_LB" , " CP1256" , " ar_EG" , " CP1256" ,
402+ " ar_DZ" , " CP1256" , " ar_BH" , " CP1256" , " ar_IQ" , " CP1256" , " ar_JO" , " CP1256" ,
403+ " ar_KW" , " CP1256" , " ar_LY" , " CP1256" , " ar_MA" , " CP1256" , " ar_OM" , " CP1256" ,
404+ " ar_QA" , " CP1256" , " ar_SY" , " CP1256" , " ar_TN" , " CP1256" , " ar_AE" , " CP1256" ,
405+ " ar_YE" , " CP1256" ," ast_ES" , " CP1252" , " az_AZ" , " CP1251" , " az_AZ" , " CP1254" ,
406+ " be_BY" , " CP1251" , " bg_BG" , " CP1251" , " br_FR" , " CP1252" , " ca_ES" , " CP1252" ,
407+ " zh_CN" , " CP936" , " zh_TW" , " CP950" , " kw_GB" , " CP1252" , " cs_CZ" , " CP1250" ,
408+ " cy_GB" , " CP1252" , " da_DK" , " CP1252" , " de_AT" , " CP1252" , " de_LI" , " CP1252" ,
409+ " de_LU" , " CP1252" , " de_CH" , " CP1252" , " de_DE" , " CP1252" , " el_GR" , " CP1253" ,
410+ " en_AU" , " CP1252" , " en_CA" , " CP1252" , " en_GB" , " CP1252" , " en_IE" , " CP1252" ,
411+ " en_JM" , " CP1252" , " en_BZ" , " CP1252" , " en_PH" , " CP1252" , " en_ZA" , " CP1252" ,
412+ " en_TT" , " CP1252" , " en_US" , " CP1252" , " en_ZW" , " CP1252" , " en_NZ" , " CP1252" ,
413+ " es_PA" , " CP1252" , " es_BO" , " CP1252" , " es_CR" , " CP1252" , " es_DO" , " CP1252" ,
414+ " es_SV" , " CP1252" , " es_EC" , " CP1252" , " es_GT" , " CP1252" , " es_HN" , " CP1252" ,
415+ " es_NI" , " CP1252" , " es_CL" , " CP1252" , " es_MX" , " CP1252" , " es_ES" , " CP1252" ,
416+ " es_CO" , " CP1252" , " es_ES" , " CP1252" , " es_PE" , " CP1252" , " es_AR" , " CP1252" ,
417+ " es_PR" , " CP1252" , " es_VE" , " CP1252" , " es_UY" , " CP1252" , " es_PY" , " CP1252" ,
418+ " et_EE" , " CP1257" , " eu_ES" , " CP1252" , " fa_IR" , " CP1256" , " fi_FI" , " CP1252" ,
419+ " fo_FO" , " CP1252" , " fr_FR" , " CP1252" , " fr_BE" , " CP1252" , " fr_CA" , " CP1252" ,
420+ " fr_LU" , " CP1252" , " fr_MC" , " CP1252" , " fr_CH" , " CP1252" , " ga_IE" , " CP1252" ,
421+ " gd_GB" , " CP1252" , " gv_IM" , " CP1252" , " gl_ES" , " CP1252" , " he_IL" , " CP1255" ,
422+ " hr_HR" , " CP1250" , " hu_HU" , " CP1250" , " id_ID" , " CP1252" , " is_IS" , " CP1252" ,
423+ " it_IT" , " CP1252" , " it_CH" , " CP1252" , " iv_IV" , " CP1252" , " ja_JP" , " CP932" ,
424+ " kk_KZ" , " CP1251" , " ko_KR" , " CP949" , " ky_KG" , " CP1251" , " lt_LT" , " CP1257" ,
425+ " lv_LV" , " CP1257" , " mk_MK" , " CP1251" , " mn_MN" , " CP1251" , " ms_BN" , " CP1252" ,
426+ " ms_MY" , " CP1252" , " nl_BE" , " CP1252" , " nl_NL" , " CP1252" , " nl_SR" , " CP1252" ,
427+ " nn_NO" , " CP1252" , " nb_NO" , " CP1252" , " pl_PL" , " CP1250" , " pt_BR" , " CP1252" ,
428+ " pt_PT" , " CP1252" , " rm_CH" , " CP1252" , " ro_RO" , " CP1250" , " ru_RU" , " CP1251" ,
429+ " sk_SK" , " CP1250" , " sl_SI" , " CP1250" , " sq_AL" , " CP1250" , " sr_RS" , " CP1251" ,
430+ " sr_RS" , " CP1250" , " sv_SE" , " CP1252" , " sv_FI" , " CP1252" , " sw_KE" , " CP1252" ,
431+ " th_TH" , " CP874" , " tr_TR" , " CP1254" , " tt_RU" , " CP1251" , " uk_UA" , " CP1251" ,
432+ " ur_PK" , " CP1256" , " uz_UZ" , " CP1251" , " uz_UZ" , " CP1254" , " vi_VN" , " CP1258" ,
433+ " wa_BE" , " CP1252" , " zh_HK" , " CP950" , " zh_SG" , " CP936" };
434+
435+ bool isAnsi = false ;
436+ bool isOem = false ;
437+
438+ if (!isUtf8 &&
439+ MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS &&
440+ MadeByVersion.Version >= 20 ) {
441+ isAnsi = true ;
442+ } else if (!isUtf8 &&
443+ (MadeByVersion.HostOS == NFileHeader::NHostOS::kNTFS ||
444+ MadeByVersion.HostOS == NFileHeader::NHostOS::kFAT )) {
445+ isOem = true ;
446+ }
447+
448+ if (isOem || isAnsi) {
449+
450+ const char *legacyCp = nullptr ;
451+ int tableLen = sizeof (isOem ? lcToOemTable : lcToAnsiTable) / sizeof (char *);
452+ int lcLen = 0 , i;
453+
454+ // Detect required code page name from current locale
455+ char *lc = setlocale (LC_CTYPE, " " );
456+
457+ if (lc && lc[0 ]) {
458+ // Compare up to the dot, if it exists, e.g. en_US.UTF-8
459+ for (lcLen = 0 ; lc[lcLen] != ' .' && lc[lcLen] != ' :' && lc[lcLen] != ' \0 ' ; ++lcLen);
460+
461+ for (i = 0 ; i < tableLen; i += 2 )
462+ if (strncmp (lc, (isOem ? lcToOemTable[i] : lcToAnsiTable[i]), lcLen) == 0 ) {
463+ legacyCp = isOem ? lcToOemTable[i + 1 ] : lcToAnsiTable[i + 1 ];
464+ break ; // Stop searching once a match is found
465+ }
417466 }
418467
419- iconv_t cd;
420- if ((cd = iconv_open (" UTF-8" , oemcp)) != (iconv_t )-1 ) {
468+ if (legacyCp) {
469+ iconv_t cd;
470+ if ((cd = iconv_open (" UTF-8" , legacyCp)) != (iconv_t )-1 ) {
421471
422- AString s_utf8;
423- const char * src = s.Ptr ();
424- size_t slen = s.Len ();
425- size_t dlen = slen * 4 ;
426- const char * dest = s_utf8.GetBuf_SetEnd (dlen + 1 ); // (source length * 4) + null termination
472+ AString sUtf8 ;
427473
428- size_t done = iconv (cd, ( char **)&src, & slen, ( char **)&dest, &dlen );
429- bzero (( size_t *)dest + done, 1 );
474+ size_t slen = s. Len ( );
475+ char * src = const_cast < char *>(s. Ptr () );
430476
431- iconv_close (cd);
477+ size_t dlen = slen * 4 + 1 ; // (source length * 4) + null termination
478+ char * dst = sUtf8 .GetBuf_SetEnd (dlen);
479+ const char * dstStart = dst;
432480
433- if (ConvertUTF8ToUnicode (s_utf8, res) || ignore_Utf8_Errors)
434- return ;
435- }
481+ memset (dst, 0 , dlen);
482+
483+ size_t done = iconv (cd, &src, &slen, &dst, &dlen);
484+
485+ if (done == (size_t )-1 ) {
486+ iconv_close (cd);
487+
488+ // iconv failed. Falling back to default behavior
489+ MultiByteToUnicodeString2 (res, s, useSpecifiedCodePage ? codePage : GetCodePage ());
490+ return ;
491+ }
492+
493+ // Null-terminate the result
494+ *dst = ' \0 ' ;
495+
496+ iconv_close (cd);
497+
498+ AString sUtf8CorrectLength ;
499+ unsigned dstCorrectLength = dst - dstStart;
500+ sUtf8CorrectLength .SetFrom (sUtf8 , dstCorrectLength);
501+ if (ConvertUTF8ToUnicode (sUtf8CorrectLength , res) /* || ignore_Utf8_Errors*/ )
502+ return ;
503+ }
504+ }
436505 }
437506 #endif
438507
0 commit comments