diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 01f13d2c9..9c96926fb 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-17.0.0.txt -# Date: 2025-07-30, 23:54:38 GMT +# Date: 2025-08-06, 15:35:10 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2072,6 +2072,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1ACF..1ADD ; 17.0 # [15] COMBINING DOUBLE CARON..COMBINING DOT-AND-RING BELOW 1AE0..1AEB ; 17.0 # [12] COMBINING LEFT TACK ABOVE..COMBINING DOUBLE RIGHTWARDS ARROW ABOVE 20C1 ; 17.0 # SAUDI RIYAL SIGN +20C3 ; 17.0 # UAE DIRHAM SIGN 2B96 ; 17.0 # EQUALS SIGN WITH INFINITY ABOVE A7CE..A7CF ; 17.0 # [2] LATIN CAPITAL LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER PHARYNGEAL VOICED FRICATIVE A7D2 ; 17.0 # LATIN CAPITAL LETTER DOUBLE THORN @@ -2114,6 +2115,6 @@ FDC8..FDCE ; 17.0 # [7] ARABIC LIGATURE RAHIMAHU ALLAAH TAAALAA..ARABIC LIG 2CEA2..2CEAD ; 17.0 # [12] CJK UNIFIED IDEOGRAPH-2CEA2..CJK UNIFIED IDEOGRAPH-2CEAD 323B0..33479 ; 17.0 # [4298] CJK UNIFIED IDEOGRAPH-323B0..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 4803 +# Total code points: 4804 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index f327784bf..d1ffc582e 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-17.0.0.txt -# Date: 2025-07-30, 23:55:08 GMT +# Date: 2025-08-06, 15:35:27 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -11787,6 +11787,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 208E ; Grapheme_Base # Pe SUBSCRIPT RIGHT PARENTHESIS 2090..209C ; Grapheme_Base # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 20A0..20C1 ; Grapheme_Base # Sc [34] EURO-CURRENCY SIGN..SAUDI RIYAL SIGN +20C3 ; Grapheme_Base # Sc UAE DIRHAM SIGN 2100..2101 ; Grapheme_Base # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2102 ; Grapheme_Base # L& DOUBLE-STRUCK CAPITAL C 2103..2106 ; Grapheme_Base # So [4] DEGREE CELSIUS..CADA UNA @@ -12985,7 +12986,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..33479 ; Grapheme_Base # Lo [8490] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-33479 -# Total code points: 157494 +# Total code points: 157495 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 3e46eb65d..a61ff935f 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-17.0.0.txt -# Date: 2025-07-24, 00:12:54 GMT +# Date: 2025-08-06, 15:35:31 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -978,6 +978,7 @@ 20AA..20AB ; N # Sc [2] NEW SHEQEL SIGN..DONG SIGN 20AC ; A # Sc EURO SIGN 20AD..20C1 ; N # Sc [21] KIP SIGN..SAUDI RIYAL SIGN +20C3 ; N # Sc UAE DIRHAM SIGN 20D0..20DC ; N # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; N # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20E1 ; N # Mn COMBINING LEFT RIGHT ARROW ABOVE diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 47580e936..7308e55d9 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -971,7 +971,9 @@ 20BF ; PR # Sc BITCOIN SIGN 20C0 ; PO # Sc SOM SIGN 20C1 ; PR # Sc SAUDI RIYAL SIGN -20C2..20CF ; PR # Cn [14] .. +20C2 ; PR # Cn +20C3 ; PR # Sc UAE DIRHAM SIGN +20C4..20CF ; PR # Cn [12] .. 20D0..20DC ; CM # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; CM # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20E1 ; CM # Mn COMBINING LEFT RIGHT ARROW ABOVE diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 5574fdd6a..ed5180993 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-17.0.0.txt -# Date: 2025-07-24, 13:28:55 GMT +# Date: 2025-08-06, 15:35:58 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -155,6 +155,7 @@ 208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS 20A0..20C1 ; Common # Sc [34] EURO-CURRENCY SIGN..SAUDI RIYAL SIGN +20C3 ; Common # Sc UAE DIRHAM SIGN 2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT 2102 ; Common # L& DOUBLE-STRUCK CAPITAL C 2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA @@ -638,7 +639,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 9123 +# Total code points: 9124 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index fca68e3e1..301f2c91d 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -7576,6 +7576,7 @@ 20BF;BITCOIN SIGN;Sc;0;ET;;;;;N;;;;; 20C0;SOM SIGN;Sc;0;ET;;;;;N;;;;; 20C1;SAUDI RIYAL SIGN;Sc;0;ET;;;;;N;;;;; +20C3;UAE DIRHAM SIGN;Sc;0;ET;;;;;N;;;;; 20D0;COMBINING LEFT HARPOON ABOVE;Mn;230;NSM;;;;;N;NON-SPACING LEFT HARPOON ABOVE;;;; 20D1;COMBINING RIGHT HARPOON ABOVE;Mn;230;NSM;;;;;N;NON-SPACING RIGHT HARPOON ABOVE;;;; 20D2;COMBINING LONG VERTICAL LINE OVERLAY;Mn;1;NSM;;;;;N;NON-SPACING LONG VERTICAL BAR OVERLAY;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index c601f1812..ac0641602 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-17.0.0.txt -# Date: 2025-07-24, 00:13:33 GMT +# Date: 2025-08-06, 15:36:00 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -904,6 +904,7 @@ 208E ; R # Pe SUBSCRIPT RIGHT PARENTHESIS 2090..209C ; R # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 20A0..20C1 ; R # Sc [34] EURO-CURRENCY SIGN..SAUDI RIYAL SIGN +20C3 ; R # Sc UAE DIRHAM SIGN 20D0..20DC ; R # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; U # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20E1 ; R # Mn COMBINING LEFT RIGHT ARROW ABOVE diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 7ce483d29..2d4185973 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-17.0.0.txt -# Date: 2025-07-24, 00:12:44 GMT +# Date: 2025-08-06, 15:35:25 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1405,6 +1405,7 @@ FF0D ; ES # Pd FULLWIDTH HYPHEN-MINUS 17DB ; ET # Sc KHMER CURRENCY SYMBOL RIEL 2030..2034 ; ET # Po [5] PER MILLE SIGN..TRIPLE PRIME 20A0..20C1 ; ET # Sc [34] EURO-CURRENCY SIGN..SAUDI RIYAL SIGN +20C3 ; ET # Sc UAE DIRHAM SIGN 212E ; ET # So ESTIMATED SYMBOL 2213 ; ET # Sm MINUS-OR-PLUS SIGN A838 ; ET # Sc NORTH INDIC RUPEE MARK @@ -1420,7 +1421,7 @@ FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN 11FDD..11FE0 ; ET # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN 1E2FF ; ET # Sc WANCHO NGUN SIGN -# The above property value applies to 14 code points not listed here. +# The above property value applies to 13 code points not listed here. # Total code points: 92 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 0c3bb54ed..23b0004ef 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-17.0.0.txt -# Date: 2025-07-24, 00:12:46 GMT +# Date: 2025-08-06, 15:35:26 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -753,6 +753,7 @@ 208E ; 0 # Pe SUBSCRIPT RIGHT PARENTHESIS 2090..209C ; 0 # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 20A0..20C1 ; 0 # Sc [34] EURO-CURRENCY SIGN..SAUDI RIYAL SIGN +20C3 ; 0 # Sc UAE DIRHAM SIGN 20DD..20E0 ; 0 # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20E2..20E4 ; 0 # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE 2100..2101 ; 0 # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT @@ -2089,7 +2090,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 816778 code points not listed here. +# The above property value applies to 816777 code points not listed here. # Total code points: 1113144 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index fd58a8128..ad797cb2e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-17.0.0.txt -# Date: 2025-07-24, 13:28:21 GMT +# Date: 2025-08-06, 15:35:28 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -842,6 +842,7 @@ 20A0..20A8 ; N # Sc [9] EURO-CURRENCY SIGN..RUPEE SIGN 20AA..20AB ; N # Sc [2] NEW SHEQEL SIGN..DONG SIGN 20AD..20C1 ; N # Sc [21] KIP SIGN..SAUDI RIYAL SIGN +20C3 ; N # Sc UAE DIRHAM SIGN 20D0..20DC ; N # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE 20DD..20E0 ; N # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH 20E1 ; N # Mn COMBINING LEFT RIGHT ARROW ABOVE @@ -2136,7 +2137,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 760612 code points not listed here. +# The above property value applies to 760611 code points not listed here. # Total code points: 792263 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 41996d634..5cf75bee0 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-17.0.0.txt -# Date: 2025-07-24, 00:12:50 GMT +# Date: 2025-08-06, 15:35:28 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -257,7 +257,8 @@ 2072..2073 ; Cn # [2] .. 208F ; Cn # 209D..209F ; Cn # [3] .. -20C2..20CF ; Cn # [14] .. +20C2 ; Cn # +20C4..20CF ; Cn # [12] .. 20F1..20FF ; Cn # [15] .. 218C..218F ; Cn # [4] .. 242A..243F ; Cn # [22] .. @@ -751,7 +752,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 814730 +# Total code points: 814729 # ================================================ @@ -4080,6 +4081,7 @@ FFE9..FFEC ; Sm # [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW 0E3F ; Sc # THAI CURRENCY SYMBOL BAHT 17DB ; Sc # KHMER CURRENCY SYMBOL RIEL 20A0..20C1 ; Sc # [34] EURO-CURRENCY SIGN..SAUDI RIYAL SIGN +20C3 ; Sc # UAE DIRHAM SIGN A838 ; Sc # NORTH INDIC RUPEE MARK FDFC ; Sc # RIAL SIGN FE69 ; Sc # SMALL DOLLAR SIGN @@ -4090,7 +4092,7 @@ FFE5..FFE6 ; Sc # [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN 1E2FF ; Sc # WANCHO NGUN SIGN 1ECB0 ; Sc # INDIC SIYAQ RUPEE MARK -# Total code points: 64 +# Total code points: 65 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index dea8b479e..67b9d47aa 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-17.0.0.txt -# Date: 2025-07-29, 13:52:13 GMT +# Date: 2025-08-06, 15:35:29 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -439,6 +439,7 @@ FF1F ; EX # Po FULLWIDTH QUESTION MARK 20BC..20BD ; PR # Sc [2] MANAT SIGN..RUBLE SIGN 20BF ; PR # Sc BITCOIN SIGN 20C1 ; PR # Sc SAUDI RIYAL SIGN +20C3 ; PR # Sc UAE DIRHAM SIGN 2116 ; PR # So NUMERO SIGN 2212..2213 ; PR # Sm [2] MINUS SIGN..MINUS-OR-PLUS SIGN FE69 ; PR # Sc SMALL DOLLAR SIGN @@ -447,7 +448,7 @@ FFE1 ; PR # Sc FULLWIDTH POUND SIGN FFE5..FFE6 ; PR # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN 1E2FF ; PR # Sc WANCHO NGUN SIGN -# The above property value applies to 14 code points not listed here. +# The above property value applies to 13 code points not listed here. # Total code points: 67 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index deab5f65e..7ce61bb28 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-17.0.0.txt -# Date: 2025-07-30, 23:55:12 GMT +# Date: 2025-08-06, 15:35:29 GMT # © 2025 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -7551,6 +7551,7 @@ 20BF ; BITCOIN SIGN 20C0 ; SOM SIGN 20C1 ; SAUDI RIYAL SIGN +20C3 ; UAE DIRHAM SIGN 20D0 ; COMBINING LEFT HARPOON ABOVE 20D1 ; COMBINING RIGHT HARPOON ABOVE 20D2 ; COMBINING LONG VERTICAL LINE OVERLAY @@ -45823,6 +45824,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 159801 +# Total code points: 159802 # EOF diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/232.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/232.txt new file mode 100644 index 000000000..c1c9d7f3b --- /dev/null +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons/232.txt @@ -0,0 +1,16 @@ +# Symbol: UAE DIRHAM SIGN (20C3) +# https://github.com/unicode-org/utc-release-management/issues/232 + +# Names always differ. +# Age always differs since these tests are comparing additions to pre-existing characters. +Ignoring Name Age: + +# Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_Mapping Idn_2008: + +Propertywise [\x{20C1} \N{SAUDI RIYAL SIGN} + \x{20C3} \N{UAE DIRHAM SIGN}] AreAlike + +end Ignoring; + +end Ignoring; \ No newline at end of file