Skip to content

Commit e5b3407

Browse files
authored
Confusables data for Devanagari UE and UUE (#1196)
* Confusables data for Devanagari UE and UUE - confusables-source.txt * Confusables data for Devanagari UE and UUE - Generated data * Also add identifier data for characters already added for 18.0.
1 parent 1732c6e commit e5b3407

File tree

8 files changed

+138
-109
lines changed

8 files changed

+138
-109
lines changed

unicodetools/data/security/dev/IdentifierType.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# IdentifierType.txt
2-
# Date: 2025-09-12, 03:24:49 GMT
2+
# Date: 2025-10-09, 03:26:38 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -4492,6 +4492,7 @@ A8F8..A8FA ; Obsolete Not_XID # 5.2 [3] DEVANAGARI SIGN PUSH
44924492
20BF ; Not_XID # 10.0 BITCOIN SIGN
44934493
20C0 ; Not_XID # 14.0 SOM SIGN
44944494
20C1 ; Not_XID # 17.0 SAUDI RIYAL SIGN
4495+
20C3 ; Not_XID # 18.0 UAE DIRHAM SIGN
44954496
2104 ; Not_XID # 1.1 CENTRE LINE SYMBOL
44964497
2108 ; Not_XID # 1.1 SCRUPLE
44974498
2114 ; Not_XID # 1.1 L B BAR SYMBOL
@@ -4814,8 +4815,10 @@ FFFD ; Not_XID # 1.1 REPLACEMENT CHARACTE
48144815
1F780..1F7D4 ; Not_XID # 7.0 [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR
48154816
1F7D5..1F7D8 ; Not_XID # 11.0 [4] CIRCLED TRIANGLE..NEGATIVE CIRCLED SQUARE
48164817
1F7D9 ; Not_XID # 15.0 NINE POINTED WHITE STAR
4818+
1F7DB ; Not_XID # 18.0 BULLET IN DOUBLE CIRCLE
48174819
1F7E0..1F7EB ; Not_XID # 12.0 [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
48184820
1F7F0 ; Not_XID # 14.0 HEAVY EQUALS SIGN
4821+
1F7F1..1F7FF ; Not_XID # 18.0 [15] CIRCLE WITH DOUBLE VERTICAL AND HORIZONTAL LINE..RHOMBUS
48194822
1F800..1F80B ; Not_XID # 7.0 [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
48204823
1F810..1F847 ; Not_XID # 7.0 [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW
48214824
1F850..1F859 ; Not_XID # 7.0 [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
@@ -4916,7 +4919,7 @@ FFFD ; Not_XID # 1.1 REPLACEMENT CHARACTE
49164919
1FBCB..1FBEF ; Not_XID # 16.0 [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE
49174920
1FBFA ; Not_XID # 17.0 ALARM BELL SYMBOL
49184921

4919-
# Total code points: 6487
4922+
# Total code points: 6504
49204923

49214924
# Identifier_Type: Not_NFKC
49224925

unicodetools/data/security/dev/confusables.txt

Lines changed: 78 additions & 77 deletions
Large diffs are not rendered by default.

unicodetools/data/security/dev/confusablesSummary.txt

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# confusablesSummary.txt
2-
# Date: 2025-09-12, 03:24:49 GMT
2+
# Date: 2025-10-09, 03:26:38 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -5778,10 +5778,20 @@
57785778
(‎ ̭ ‎) 032D COMBINING CIRCUMFLEX ACCENT BELOW
57795779
← (‎ ᳙ ‎) 1CD9 VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER
57805780

5781-
# ̮
5781+
# 𑭢 ̮ ॖ ੁ
57825782
(‎ ̮ ‎) 032E COMBINING BREVE BELOW
5783+
← (‎ 𑭢 ‎) 11B62 SHARADA VOWEL SIGN UE # →ॖ→
5784+
← (‎ ॖ ‎) 0956 DEVANAGARI VOWEL SIGN UE
5785+
← (‎ ੁ ‎) 0A41 GURMUKHI VOWEL SIGN U # →ॖ→
57835786
← (‎ ᳘ ‎) 1CD8 VEDIC TONE CANDRA BELOW
57845787

5788+
# 𑭢𑭢 ̮̮ 𑭣 ॗ ੂ
5789+
(‎ ̮̮ ‎) 032E 032E COMBINING BREVE BELOW, COMBINING BREVE BELOW
5790+
← (‎ 𑭢𑭢 ‎) 11B62 11B62 SHARADA VOWEL SIGN UE, SHARADA VOWEL SIGN UE
5791+
← (‎ 𑭣 ‎) 11B63 SHARADA VOWEL SIGN UUE # →ॗ→
5792+
← (‎ ॗ ‎) 0957 DEVANAGARI VOWEL SIGN UUE
5793+
← (‎ ੂ ‎) 0A42 GURMUKHI VOWEL SIGN UU # →ॗ→
5794+
57855795
# ̳ ͇
57865796
(‎ ̳ ‎) 0333 COMBINING DOUBLE LOW LINE
57875797
← (‎ ͇ ‎) 0347 COMBINING EQUALS SIGN BELOW
@@ -8688,16 +8698,6 @@
86888698
← (‎ ੍ ‎) 0A4D GURMUKHI SIGN VIRAMA
86898699
← (‎ ્ ‎) 0ACD GUJARATI SIGN VIRAMA
86908700

8691-
# 𑭢 ॖ ੁ
8692-
(‎ ॖ ‎) 0956 DEVANAGARI VOWEL SIGN UE
8693-
← (‎ 𑭢 ‎) 11B62 SHARADA VOWEL SIGN UE
8694-
← (‎ ੁ ‎) 0A41 GURMUKHI VOWEL SIGN U
8695-
8696-
# 𑭣 ॗ ੂ
8697-
(‎ ॗ ‎) 0957 DEVANAGARI VOWEL SIGN UUE
8698-
← (‎ 𑭣 ‎) 11B63 SHARADA VOWEL SIGN UUE
8699-
← (‎ ੂ ‎) 0A42 GURMUKHI VOWEL SIGN UU
8700-
87018701
# । ꠰
87028702
(‎ । ‎) 0964 DEVANAGARI DANDA
87038703
← (‎ ꠰ ‎) A830 NORTH INDIC FRACTION ONE QUARTER
@@ -8885,9 +8885,9 @@
88858885
← (‎ ੳ𑭢 ‎) 0A73 11B62 GURMUKHI URA, SHARADA VOWEL SIGN UE # →ੳੁ→
88868886
← (‎ ੳੁ ‎) 0A73 0A41 GURMUKHI URA, GURMUKHI VOWEL SIGN U
88878887

8888-
# ੳ𑭣 ੳੂ ਊ
8888+
# ੳ𑭢𑭢 ੳੂ ਊ
88898889
(‎ ਊ ‎) 0A0A GURMUKHI LETTER UU
8890-
← (‎ ੳ𑭣 ‎) 0A73 11B63 GURMUKHI URA, SHARADA VOWEL SIGN UUE # →ੳੂ→
8890+
← (‎ ੳ𑭢𑭢 ‎) 0A73 11B62 11B62 GURMUKHI URA, SHARADA VOWEL SIGN UE, SHARADA VOWEL SIGN UE # →ੳੂ→
88918891
← (‎ ੳੂ ‎) 0A73 0A42 GURMUKHI URA, GURMUKHI VOWEL SIGN UU
88928892

88938893
# અા આ
@@ -17834,5 +17834,5 @@
1783417834
(‎ 𪘀 ‎) 2A600 CJK UNIFIED IDEOGRAPH-2A600
1783517835
← (‎ 𪘀 ‎) 2FA1D CJK COMPATIBILITY IDEOGRAPH-2FA1D
1783617836

17837-
# total : 7579
17837+
# total : 7582
1783817838

unicodetools/data/security/dev/data/confusablesSummaryIdentifier.txt

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# confusablesSummaryIdentifier.txt
2-
# Date: 2025-09-12, 03:24:49 GMT
2+
# Date: 2025-10-09, 03:26:38 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -556,6 +556,14 @@
556556
← (‎ ઼ ‎) 0ABC GUJARATI SIGN NUKTA
557557
← (‎ ଼ ‎) 0B3C ORIYA SIGN NUKTA
558558

559+
# ॖ ੁ
560+
(‎ ॖ ‎) 0956 DEVANAGARI VOWEL SIGN UE
561+
← (‎ ੁ ‎) 0A41 GURMUKHI VOWEL SIGN U
562+
563+
# ॗ ੂ
564+
(‎ ॗ ‎) 0957 DEVANAGARI VOWEL SIGN UUE
565+
← (‎ ੂ ‎) 0A42 GURMUKHI VOWEL SIGN UU
566+
559567
# Γ Г
560568
(‎ Γ ‎) 0393 GREEK CAPITAL LETTER GAMMA
561569
← (‎ Г ‎) 0413 CYRILLIC CAPITAL LETTER GHE
@@ -925,14 +933,6 @@
925933
← (‎ ੍ ‎) 0A4D GURMUKHI SIGN VIRAMA
926934
← (‎ ્ ‎) 0ACD GUJARATI SIGN VIRAMA
927935

928-
# ॖ ੁ
929-
(‎ ॖ ‎) 0956 DEVANAGARI VOWEL SIGN UE
930-
← (‎ ੁ ‎) 0A41 GURMUKHI VOWEL SIGN U
931-
932-
# ॗ ੂ
933-
(‎ ॗ ‎) 0957 DEVANAGARI VOWEL SIGN UUE
934-
← (‎ ੂ ‎) 0A42 GURMUKHI VOWEL SIGN UU
935-
936936
# २ ર ૨
937937
(‎ २ ‎) 0968 DEVANAGARI DIGIT TWO
938938
← (‎ ર ‎) 0AB0 GUJARATI LETTER RA # →૨→

unicodetools/data/security/dev/data/draft-restrictions.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59697,6 +59697,7 @@ E0100..E01EF ; Allowed ; Recommended # [240] (U+E0100..U+E01EF) VARIATION SELE
5969759697
2074..208E ; ~Unicode Identifier # [27] (⁴..₎) SUPERSCRIPT FOUR..SUBSCRIPT RIGHT PARENTHESIS
5969859698
2090..209C ; ~Unicode Identifier # [13] (ₐ..ₜ) LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T
5969959699
20A0..20C1 ; ~Unicode Identifier # [34] (₠..⃁) EURO-CURRENCY SIGN..SAUDI RIYAL SIGN
59700+
20C3 ; ~Unicode Identifier # (⃃) UAE DIRHAM SIGN
5970059701
20DD..20E0 ; ~Unicode Identifier # [4] (⃝..⃠) COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
5970159702
20E2..20E4 ; ~Unicode Identifier # [3] (⃢..⃤) COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
5970259703
2100..2117 ; ~Unicode Identifier # [24] (℀..℗) ACCOUNT OF..SOUND RECORDING COPYRIGHT
@@ -59998,9 +59999,9 @@ FFF9..FFFD ; ~Unicode Identifier # [5] (U+FFF9..�) INTERLINEAR ANNOTATION
5999859999
1F6DC..1F6EC ; ~Unicode Identifier # [17] (🛜..🛬) WIRELESS..AIRPLANE ARRIVING
5999960000
1F6F0..1F6FC ; ~Unicode Identifier # [13] (🛰..🛼) SATELLITE..ROLLER SKATE
6000060001
1F700..1F7D9 ; ~Unicode Identifier # [218] (🜀..🟙) ALCHEMICAL SYMBOL FOR QUINTESSENCE..NINE POINTED WHITE STAR
60002+
1F7DB ; ~Unicode Identifier # (🟛) BULLET IN DOUBLE CIRCLE
6000160003
1F7E0..1F7EB ; ~Unicode Identifier # [12] (🟠..🟫) LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
60002-
1F7F0 ; ~Unicode Identifier # (🟰) HEAVY EQUALS SIGN
60003-
1F800..1F80B ; ~Unicode Identifier # [12] (🠀..🠋) LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
60004+
1F7F0..1F80B ; ~Unicode Identifier # [28] (🟰..🠋) HEAVY EQUALS SIGN..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
6000460005
1F810..1F847 ; ~Unicode Identifier # [56] (🠐..🡇) LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW
6000560006
1F850..1F859 ; ~Unicode Identifier # [10] (🡐..🡙) LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
6000660007
1F860..1F887 ; ~Unicode Identifier # [40] (🡠..🢇) WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
@@ -60023,4 +60024,4 @@ FFF9..FFFD ; ~Unicode Identifier # [5] (U+FFF9..�) INTERLINEAR ANNOTATION
6002360024
E0001 ; ~Unicode Identifier # (U+E0001) LANGUAGE TAG
6002460025
E0020..E007F ; ~Unicode Identifier # [96] (U+E0020..U+E007F) TAG SPACE..CANCEL TAG
6002560026

60026-
# Total code points: 14287
60027+
# Total code points: 14304

unicodetools/data/security/dev/data/review.txt

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# review.txt
2-
# Date: 2025-09-12, 03:25:00 GMT
2+
# Date: 2025-10-09, 03:27:02 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -71181,6 +71181,7 @@ E0100..E01EF ; Restricted ; output-disallowed # [240] (U+E0100..U+E01EF) VARIA
7118171181
2029 ; Restricted ; not in XID+ # (U+2029) PARAGRAPH SEPARATOR
7118271182
202A..202E ; Restricted ; not in XID+ # [5] (U+202A..U+202E) LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
7118371183
2066..2069 ; Restricted ; not in XID+ # [4] (U+2066..U+2069) LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
71184+
20C3 ; Restricted ; not in XID+ # (⃃) UAE DIRHAM SIGN
7118471185
2488..249B ; Restricted ; not in XID+ # [20] (⒈..⒛) DIGIT ONE FULL STOP..NUMBER TWENTY FULL STOP
7118571186
2FF0 ; Restricted ; not in XID+ # (⿰) IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT
7118671187
2FF1 ; Restricted ; not in XID+ # (⿱) IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW
@@ -71230,7 +71231,23 @@ FFFD ; Restricted ; not in XID+ # (�) REPLACEMENT CHARACTER
7123071231
1343E ; Restricted ; not in XID+ # (U+1343E) EGYPTIAN HIEROGLYPH BEGIN WALLED ENCLOSURE
7123171232
1343F ; Restricted ; not in XID+ # (U+1343F) EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE
7123271233
1F100 ; Restricted ; not in XID+ # (🄀) DIGIT ZERO FULL STOP
71234+
1F7DB ; Restricted ; not in XID+ # (🟛) BULLET IN DOUBLE CIRCLE
71235+
1F7F1 ; Restricted ; not in XID+ # (🟱) CIRCLE WITH DOUBLE VERTICAL AND HORIZONTAL LINE
71236+
1F7F2 ; Restricted ; not in XID+ # (🟲) DOUBLE CIRCLE WITH DOUBLE HORIZONTAL LINE
71237+
1F7F3 ; Restricted ; not in XID+ # (🟳) CIRCLED BOTTOM RIGHT OBLIQUE HALF BLACK CIRCLE
71238+
1F7F4 ; Restricted ; not in XID+ # (🟴) LEFT HALF WHITE CIRCLE
71239+
1F7F5 ; Restricted ; not in XID+ # (🟵) RIGHT HALF WHITE CIRCLE
71240+
1F7F6 ; Restricted ; not in XID+ # (🟶) TRANSPARENT CUBE
71241+
1F7F7 ; Restricted ; not in XID+ # (🟷) WHITE CUBE
71242+
1F7F8 ; Restricted ; not in XID+ # (🟸) HORIZONTAL DOUBLE WHITE SMALL SQUARE
71243+
1F7F9 ; Restricted ; not in XID+ # (🟹) VERTICAL DOUBLE WHITE SMALL SQUARE
71244+
1F7FA ; Restricted ; not in XID+ # (🟺) WHITE SQUARE WITH BOTTOM HALF BISECTED
71245+
1F7FB ; Restricted ; not in XID+ # (🟻) WHITE SQUARE WITH TOP HALF BISECTED
71246+
1F7FC ; Restricted ; not in XID+ # (🟼) WHITE SQUARE WITH HORIZONTAL AND VERTICAL BISECTING LINES
71247+
1F7FD ; Restricted ; not in XID+ # (🟽) LOWER LEFT FLATTENED RIGHT TRIANGLE
71248+
1F7FE ; Restricted ; not in XID+ # (🟾) LOWER RIGHT FLATTENED RIGHT TRIANGLE
71249+
1F7FF ; Restricted ; not in XID+ # (🟿) RHOMBUS
7123371250
E0001 ; Restricted ; not in XID+ # (U+E0001) LANGUAGE TAG
7123471251
E0020..E007F ; Restricted ; not in XID+ # [96] (U+E0020..U+E007F) TAG SPACE..CANCEL TAG
7123571252

71236-
# Total code points: 226
71253+
# Total code points: 243

unicodetools/data/security/dev/data/source/confusables-source.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5724,3 +5724,7 @@ A7F1 ; 02E2 # ( ꟱ → ˢ ) MODIFIER LETTER CAPITAL S → MODIFIER LETTER SMAL
57245724

57255725
# Confusable Katakana-Han pair (PAG ref #442)
57265726
1B122 ; 4E8E
5727+
5728+
# Confusables for Devanagari UE and UUE (PAG ref #449)
5729+
0956 ; 032E
5730+
0957 ; 032E 032E

unicodetools/data/security/dev/data/source/formatted-source.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# formatted-source.txt
2-
# Date: 2025-09-12, 03:24:47 GMT
2+
# Date: 2025-10-09, 03:26:35 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
@@ -1265,8 +1265,11 @@
12651265

12661266
032D ; 1CD9 # ( ̭ ~ ᳙ ) COMBINING CIRCUMFLEX ACCENT BELOW ~ VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER
12671267

1268+
032E ; 0956 # ( ̮ ~ ॖ ) COMBINING BREVE BELOW ~ DEVANAGARI VOWEL SIGN UE
12681269
032E ; 1CD8 # ( ̮ ~ ᳘ ) COMBINING BREVE BELOW ~ VEDIC TONE CANDRA BELOW
12691270

1271+
032E 032E ; 0957 # ( ̮̮ ~ ॗ ) COMBINING BREVE BELOW, COMBINING BREVE BELOW ~ DEVANAGARI VOWEL SIGN UUE
1272+
12701273
0331 ; 0320 # ( ̱ ~ ̠ ) COMBINING MACRON BELOW ~ COMBINING MINUS SIGN BELOW
12711274
0331 ; 0952 # ( ̱ ~ ॒ ) COMBINING MACRON BELOW ~ DEVANAGARI STRESS SIGN ANUDATTA
12721275

0 commit comments

Comments
 (0)