@@ -8,81 +8,6 @@ namespace Jint.Native.Intl
88 /// </summary>
99 internal static class IcuHelpers
1010 {
11-
12- /// <summary>
13- /// Equivalent to WebKit's languageTagForLocaleID(localeID, isImmortal=false).
14- /// Calls ICU uloc_toLanguageTag(localeId, strict=false), then applies the same
15- /// unicode extension cleanup WebKit does (drop "-u-…-true" values).
16- /// </summary>
17- public static string LanguageTagForLocaleId ( string localeId )
18- {
19- if ( string . IsNullOrEmpty ( localeId ) )
20- return string . Empty ;
21-
22- var status = ICU . UErrorCode . U_ZERO_ERROR ;
23-
24- // First pass with a reasonable buffer
25- byte [ ] buf = new byte [ 256 ] ;
26- int len = ICU . uloc_toLanguageTag ( localeId , buf , buf . Length , strict : false , ref status ) ;
27-
28- // If ICU tells us the required size, reallocate and retry
29- if ( len > buf . Length )
30- {
31- buf = new byte [ len ] ;
32- status = ICU . UErrorCode . U_ZERO_ERROR ;
33- len = ICU . uloc_toLanguageTag ( localeId , buf , buf . Length , strict : false , ref status ) ;
34- }
35-
36- if ( status != ICU . UErrorCode . U_ZERO_ERROR || len <= 0 )
37- Throw . ArgumentException ( $ "ICU uloc_toLanguageTag failed for '{ localeId } ' (status={ status } ).") ;
38-
39- // ICU writes UTF-8 bytes; decode exactly the returned length
40- string tag = System . Text . Encoding . UTF8 . GetString ( buf , 0 , len ) ;
41-
42- // Do the same extension cleanup WebKit applies
43- return CanonicalizeUnicodeExtensionsAfterIcu ( tag ) ;
44- }
45-
46- // Keys whose boolean "true" value is **elided** in canonical form.
47- // For these, "-u-<key>-yes" and "-u-<key>-true" both canonicalize to just "-u-<key>".
48- // Add "ca" here so a bare `-u-ca` does not synthesize `-yes`
49- private static readonly HashSet < string > s_trueDroppableKeys = new ( StringComparer . OrdinalIgnoreCase )
50- {
51- "kb" , "kc" , "kh" , "kk" , "kn" , "ca"
52- } ;
53-
54-
55- // Canonicalize subdivision aliases (used for rg/sd values).
56- private static string CanonicalizeSubdivision ( string value )
57- {
58- switch ( value . ToLowerInvariant ( ) )
59- {
60- case "no23" : return "no50" ;
61- case "cn11" : return "cnbj" ;
62- case "cz10a" : return "cz110" ;
63- case "fra" : return "frges" ;
64- case "frg" : return "frges" ;
65- case "lud" : return "lucl" ; // test262 prefers the first in replacement list
66- default : return value ;
67- }
68- }
69-
70- // Canonicalize time zone type aliases (used for tz values).
71- private static string CanonicalizeTimeZoneType ( string value )
72- {
73- switch ( value . ToLowerInvariant ( ) )
74- {
75- case "cnckg" : return "cnsha" ; // deprecated -> preferred
76- case "eire" : return "iedub" ; // alias -> canonical
77- case "est" : return "papty" ; // alias -> canonical
78- case "gmt0" : return "gmt" ; // alias -> canonical
79- case "uct" : return "utc" ; // alias -> canonical
80- case "zulu" : return "utc" ; // alias -> canonical
81- case "utcw05" : return "papty" ; // short offset alias seen in test262
82- default : return value ;
83- }
84- }
85-
8611 /// <summary>
8712 /// Mirrors WebKit's canonicalizeUnicodeExtensionsAfterICULocaleCanonicalization():
8813 /// - Finds the "-u-" extension and its end (before the next singleton).
@@ -282,6 +207,45 @@ public static string CanonicalizeUnicodeLocaleIdOrThrow(Realm realm, string tag)
282207 return canonical ;
283208 }
284209
210+ // Keys whose boolean "true" value is **elided** in canonical form.
211+ // For these, "-u-<key>-yes" and "-u-<key>-true" both canonicalize to just "-u-<key>".
212+ // Add "ca" here so a bare `-u-ca` does not synthesize `-yes`
213+ private static readonly HashSet < string > s_trueDroppableKeys = new ( StringComparer . OrdinalIgnoreCase )
214+ {
215+ "kb" , "kc" , "kh" , "kk" , "kn" , "ca"
216+ } ;
217+
218+ // Canonicalize subdivision aliases (used for rg/sd values).
219+ private static string CanonicalizeSubdivision ( string value )
220+ {
221+ switch ( value . ToLowerInvariant ( ) )
222+ {
223+ case "no23" : return "no50" ;
224+ case "cn11" : return "cnbj" ;
225+ case "cz10a" : return "cz110" ;
226+ case "fra" : return "frges" ;
227+ case "frg" : return "frges" ;
228+ case "lud" : return "lucl" ; // test262 prefers the first in replacement list
229+ default : return value ;
230+ }
231+ }
232+
233+ // Canonicalize time zone type aliases (used for tz values).
234+ private static string CanonicalizeTimeZoneType ( string value )
235+ {
236+ switch ( value . ToLowerInvariant ( ) )
237+ {
238+ case "cnckg" : return "cnsha" ; // deprecated -> preferred
239+ case "eire" : return "iedub" ; // alias -> canonical
240+ case "est" : return "papty" ; // alias -> canonical
241+ case "gmt0" : return "gmt" ; // alias -> canonical
242+ case "uct" : return "utc" ; // alias -> canonical
243+ case "zulu" : return "utc" ; // alias -> canonical
244+ case "utcw05" : return "papty" ; // short offset alias seen in test262
245+ default : return value ;
246+ }
247+ }
248+
285249 private static string FixKnownLanguageAliases ( string canonicalTag )
286250 {
287251 if ( string . IsNullOrEmpty ( canonicalTag ) )
0 commit comments