Update to Unicode 13.0.0.

This commit is contained in:
Philip.Hazel 2020-03-25 17:18:33 +00:00
parent 59233b8079
commit c472f3f91a
26 changed files with 6009 additions and 4252 deletions

View File

@ -97,6 +97,8 @@ character tables handling have been done:
22. Changed setting of CMAKE_MODULE_PATH in CMakeLists.txt from SET to
LIST(APPEND...) to allow a setting from the command line to be included.
23. Updated to Unicode 13.0.0.
Version 10.34 21-November-2019
------------------------------

View File

@ -819,6 +819,7 @@ Caucasian_Albanian,
Chakma,
Cham,
Cherokee,
Chorasmian,
Common,
Coptic,
Cuneiform,
@ -826,6 +827,7 @@ Cypriot,
Cyrillic,
Deseret,
Devanagari,
Dives_Akuru,
Dogra,
Duployan,
Egyptian_Hieroglyphs,
@ -857,6 +859,7 @@ Kannada,
Katakana,
Kayah_Li,
Kharoshthi,
Khitan_Small_Script,
Khmer,
Khojki,
Khudawadi,
@ -947,6 +950,7 @@ Unknown,
Vai,
Wancho,
Warang_Citi,
Yezidi,
Yi,
Zanabazar_Square.
</P>

View File

@ -223,6 +223,7 @@ Caucasian_Albanian,
Chakma,
Cham,
Cherokee,
Chorasmian,
Common,
Coptic,
Cuneiform,
@ -230,6 +231,7 @@ Cypriot,
Cyrillic,
Deseret,
Devanagari,
Dives_Akuru,
Dogra,
Duployan,
Egyptian_Hieroglyphs,
@ -261,6 +263,7 @@ Kannada,
Katakana,
Kayah_Li,
Kharoshthi,
Khitan_Small_Script,
Khmer,
Khojki,
Khudawadi,
@ -350,6 +353,7 @@ Ugaritic,
Vai,
Wancho,
Warang_Citi,
Yezidi,
Yi,
Zanabazar_Square.
</P>

File diff suppressed because it is too large Load Diff

View File

@ -814,6 +814,7 @@ Caucasian_Albanian,
Chakma,
Cham,
Cherokee,
Chorasmian,
Common,
Coptic,
Cuneiform,
@ -821,6 +822,7 @@ Cypriot,
Cyrillic,
Deseret,
Devanagari,
Dives_Akuru,
Dogra,
Duployan,
Egyptian_Hieroglyphs,
@ -852,6 +854,7 @@ Kannada,
Katakana,
Kayah_Li,
Kharoshthi,
Khitan_Small_Script,
Khmer,
Khojki,
Khudawadi,
@ -942,6 +945,7 @@ Unknown,
Vai,
Wancho,
Warang_Citi,
Yezidi,
Yi,
Zanabazar_Square.
.P

View File

@ -193,6 +193,7 @@ Caucasian_Albanian,
Chakma,
Cham,
Cherokee,
Chorasmian,
Common,
Coptic,
Cuneiform,
@ -200,6 +201,7 @@ Cypriot,
Cyrillic,
Deseret,
Devanagari,
Dives_Akuru,
Dogra,
Duployan,
Egyptian_Hieroglyphs,
@ -231,6 +233,7 @@ Kannada,
Katakana,
Kayah_Li,
Kharoshthi,
Khitan_Small_Script,
Khmer,
Khojki,
Khudawadi,
@ -320,6 +323,7 @@ Ugaritic,
Vai,
Wancho,
Warang_Citi,
Yezidi,
Yi,
Zanabazar_Square.
.

View File

@ -27,6 +27,7 @@
# Added script names for Unicode 11.0.0, 03-July-2018.
# Added 'Unknown' script, 01-October-2018.
# Added script names for Unicode 12.1.0, 27-July-2019.
# Added script names for Unicode 13.0.0, 10-March-2020.
script_names = ['Unknown', 'Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \
'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \
@ -63,7 +64,9 @@ script_names = ['Unknown', 'Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille
'Dogra', 'Gunjala_Gondi', 'Hanifi_Rohingya', 'Makasar', 'Medefaidrin',
'Old_Sogdian', 'Sogdian',
# New for Unicode 12.0.0
'Elymaic', 'Nandinagari', 'Nyiakeng_Puachue_Hmong', 'Wancho'
'Elymaic', 'Nandinagari', 'Nyiakeng_Puachue_Hmong', 'Wancho',
# New for Unicode 13.0.0
'Chorasmian', 'Dives_Akuru', 'Khitan_Small_Script', 'Yezidi'
]
category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',

View File

@ -23,11 +23,14 @@
# DerivedGeneralCategory.txt is found in the "extracted" subdirectory of the
# Unicode database (UCD) on the Unicode web site; GraphemeBreakProperty.txt is
# in the "auxiliary" subdirectory. Scripts.txt, ScriptExtensions.txt, and
# CaseFolding.txt are directly in the UCD directory. The emoji-data.txt file is
# CaseFolding.txt are directly in the UCD directory.
#
# The emoji-data.txt file is found in the "emoji" subdirectory even though it
# is technically part of a different (but coordinated) standard as shown
# in files associated with Unicode Technical Standard #51 ("Unicode Emoji"),
# for example:
#
# http://unicode.org/Public/emoji/11.0/emoji-data.txt
# http://unicode.org/Public/emoji/13.0/ReadMe.txt
#
# -----------------------------------------------------------------------------
# Minor modifications made to this script:
@ -88,6 +91,7 @@
# 01-October-2018: Added the 'Unknown' script name
# 03-October-2018: Added new field for Script Extensions
# 27-July-2019: Updated for Unicode 12.1.0
# 10-March-2020: Updated for Unicode 13.0.0
# ----------------------------------------------------------------------------
#
#
@ -179,7 +183,6 @@
# any of those scripts, which are Bengali, Devanagari, Grantha, and Kannada.
#
# Philip Hazel, 03 July 2008
# Last Updated: 07 October 2018
##############################################################################
@ -427,7 +430,9 @@ script_names = ['Unknown', 'Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille
'Dogra', 'Gunjala_Gondi', 'Hanifi_Rohingya', 'Makasar', 'Medefaidrin',
'Old_Sogdian', 'Sogdian',
# New for Unicode 12.0.0
'Elymaic', 'Nandinagari', 'Nyiakeng_Puachue_Hmong', 'Wancho'
'Elymaic', 'Nandinagari', 'Nyiakeng_Puachue_Hmong', 'Wancho',
# New for Unicode 13.0.0
'Chorasmian', 'Dives_Akuru', 'Khitan_Small_Script', 'Yezidi'
]
script_abbrevs = [
@ -462,7 +467,9 @@ script_abbrevs = [
#New for Unicode 11.0.0
'Dogr', 'Gong', 'Rohg', 'Maka', 'Medf', 'Sogo', 'Sogd',
#New for Unicode 12.0.0
'Elym', 'Nand', 'Hmnp', 'Wcho'
'Elym', 'Nand', 'Hmnp', 'Wcho',
#New for Unicode 13.0.0
'Chrs', 'Diak', 'Kits', 'Yezi'
]
category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',

View File

@ -81,11 +81,11 @@ script names.
MultiStage2.py has two lists: the full names and the abbreviations that are
found in the ScriptExtensions.txt file. A list of script names and their
abbreviations s can be found in the PropertyValueAliases.txt file on the
abbreviations can be found in the PropertyValueAliases.txt file on the
Unicode web site. There is also a Wikipedia page that lists them, and notes the
Unicode version in which they were introduced:
http://en.wikipedia.org/wiki/Unicode_scripts#Table_of_Unicode_scripts
https://en.wikipedia.org/wiki/Unicode_scripts#Table_of_Unicode_scripts
Once the script name lists have been updated, MultiStage2.py can be run to
generate a new version of pcre2_ucd.c, and GenerateUtt.py can be run to

View File

@ -1,5 +1,5 @@
# CaseFolding-12.1.0.txt
# Date: 2019-03-10, 10:53:00 GMT
# CaseFolding-13.0.0.txt
# Date: 2019-09-08, 23:30:59 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
@ -1234,6 +1234,9 @@ A7C2; C; A7C3; # LATIN CAPITAL LETTER ANGLICANA W
A7C4; C; A794; # LATIN CAPITAL LETTER C WITH PALATAL HOOK
A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK
A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK
A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H
AB70; C; 13A0; # CHEROKEE SMALL LETTER A
AB71; C; 13A1; # CHEROKEE SMALL LETTER E
AB72; C; 13A2; # CHEROKEE SMALL LETTER I

View File

@ -1,5 +1,5 @@
# DerivedGeneralCategory-12.1.0.txt
# Date: 2019-03-10, 10:53:08 GMT
# DerivedGeneralCategory-13.0.0.txt
# Date: 2019-10-21, 14:30:32 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
@ -38,7 +38,7 @@
085F ; Cn # <reserved-085F>
086B..089F ; Cn # [53] <reserved-086B>..<reserved-089F>
08B5 ; Cn # <reserved-08B5>
08BE..08D2 ; Cn # [21] <reserved-08BE>..<reserved-08D2>
08C8..08D2 ; Cn # [11] <reserved-08C8>..<reserved-08D2>
0984 ; Cn # <reserved-0984>
098D..098E ; Cn # [2] <reserved-098D>..<reserved-098E>
0991..0992 ; Cn # [2] <reserved-0991>..<reserved-0992>
@ -92,7 +92,7 @@
0B3A..0B3B ; Cn # [2] <reserved-0B3A>..<reserved-0B3B>
0B45..0B46 ; Cn # [2] <reserved-0B45>..<reserved-0B46>
0B49..0B4A ; Cn # [2] <reserved-0B49>..<reserved-0B4A>
0B4E..0B55 ; Cn # [8] <reserved-0B4E>..<reserved-0B55>
0B4E..0B54 ; Cn # [7] <reserved-0B4E>..<reserved-0B54>
0B58..0B5B ; Cn # [4] <reserved-0B58>..<reserved-0B5B>
0B5E ; Cn # <reserved-0B5E>
0B64..0B65 ; Cn # [2] <reserved-0B64>..<reserved-0B65>
@ -137,14 +137,13 @@
0CE4..0CE5 ; Cn # [2] <reserved-0CE4>..<reserved-0CE5>
0CF0 ; Cn # <reserved-0CF0>
0CF3..0CFF ; Cn # [13] <reserved-0CF3>..<reserved-0CFF>
0D04 ; Cn # <reserved-0D04>
0D0D ; Cn # <reserved-0D0D>
0D11 ; Cn # <reserved-0D11>
0D45 ; Cn # <reserved-0D45>
0D49 ; Cn # <reserved-0D49>
0D50..0D53 ; Cn # [4] <reserved-0D50>..<reserved-0D53>
0D64..0D65 ; Cn # [2] <reserved-0D64>..<reserved-0D65>
0D80..0D81 ; Cn # [2] <reserved-0D80>..<reserved-0D81>
0D80 ; Cn # <reserved-0D80>
0D84 ; Cn # <reserved-0D84>
0D97..0D99 ; Cn # [3] <reserved-0D97>..<reserved-0D99>
0DB2 ; Cn # <reserved-0DB2>
@ -231,7 +230,7 @@
1A8A..1A8F ; Cn # [6] <reserved-1A8A>..<reserved-1A8F>
1A9A..1A9F ; Cn # [6] <reserved-1A9A>..<reserved-1A9F>
1AAE..1AAF ; Cn # [2] <reserved-1AAE>..<reserved-1AAF>
1ABF..1AFF ; Cn # [65] <reserved-1ABF>..<reserved-1AFF>
1AC1..1AFF ; Cn # [63] <reserved-1AC1>..<reserved-1AFF>
1B4C..1B4F ; Cn # [4] <reserved-1B4C>..<reserved-1B4F>
1B7D..1B7F ; Cn # [3] <reserved-1B7D>..<reserved-1B7F>
1BF4..1BFB ; Cn # [8] <reserved-1BF4>..<reserved-1BFB>
@ -268,7 +267,7 @@
2427..243F ; Cn # [25] <reserved-2427>..<reserved-243F>
244B..245F ; Cn # [21] <reserved-244B>..<reserved-245F>
2B74..2B75 ; Cn # [2] <reserved-2B74>..<reserved-2B75>
2B96..2B97 ; Cn # [2] <reserved-2B96>..<reserved-2B97>
2B96 ; Cn # <reserved-2B96>
2C2F ; Cn # <reserved-2C2F>
2C5F ; Cn # <reserved-2C5F>
2CF4..2CF8 ; Cn # [5] <reserved-2CF4>..<reserved-2CF8>
@ -286,7 +285,7 @@
2DCF ; Cn # <reserved-2DCF>
2DD7 ; Cn # <reserved-2DD7>
2DDF ; Cn # <reserved-2DDF>
2E50..2E7F ; Cn # [48] <reserved-2E50>..<reserved-2E7F>
2E53..2E7F ; Cn # [45] <reserved-2E53>..<reserved-2E7F>
2E9A ; Cn # <reserved-2E9A>
2EF4..2EFF ; Cn # [12] <reserved-2EF4>..<reserved-2EFF>
2FD6..2FEF ; Cn # [26] <reserved-2FD6>..<reserved-2FEF>
@ -296,18 +295,16 @@
3100..3104 ; Cn # [5] <reserved-3100>..<reserved-3104>
3130 ; Cn # <reserved-3130>
318F ; Cn # <reserved-318F>
31BB..31BF ; Cn # [5] <reserved-31BB>..<reserved-31BF>
31E4..31EF ; Cn # [12] <reserved-31E4>..<reserved-31EF>
321F ; Cn # <reserved-321F>
4DB6..4DBF ; Cn # [10] <reserved-4DB6>..<reserved-4DBF>
9FF0..9FFF ; Cn # [16] <reserved-9FF0>..<reserved-9FFF>
9FFD..9FFF ; Cn # [3] <reserved-9FFD>..<reserved-9FFF>
A48D..A48F ; Cn # [3] <reserved-A48D>..<reserved-A48F>
A4C7..A4CF ; Cn # [9] <reserved-A4C7>..<reserved-A4CF>
A62C..A63F ; Cn # [20] <reserved-A62C>..<reserved-A63F>
A6F8..A6FF ; Cn # [8] <reserved-A6F8>..<reserved-A6FF>
A7C0..A7C1 ; Cn # [2] <reserved-A7C0>..<reserved-A7C1>
A7C7..A7F6 ; Cn # [48] <reserved-A7C7>..<reserved-A7F6>
A82C..A82F ; Cn # [4] <reserved-A82C>..<reserved-A82F>
A7CB..A7F4 ; Cn # [42] <reserved-A7CB>..<reserved-A7F4>
A82D..A82F ; Cn # [3] <reserved-A82D>..<reserved-A82F>
A83A..A83F ; Cn # [6] <reserved-A83A>..<reserved-A83F>
A878..A87F ; Cn # [8] <reserved-A878>..<reserved-A87F>
A8C6..A8CD ; Cn # [8] <reserved-A8C6>..<reserved-A8CD>
@ -327,7 +324,7 @@ AB0F..AB10 ; Cn # [2] <reserved-AB0F>..<reserved-AB10>
AB17..AB1F ; Cn # [9] <reserved-AB17>..<reserved-AB1F>
AB27 ; Cn # <reserved-AB27>
AB2F ; Cn # <reserved-AB2F>
AB68..AB6F ; Cn # [8] <reserved-AB68>..<reserved-AB6F>
AB6C..AB6F ; Cn # [4] <reserved-AB6C>..<reserved-AB6F>
ABEE..ABEF ; Cn # [2] <reserved-ABEE>..<reserved-ABEF>
ABFA..ABFF ; Cn # [6] <reserved-ABFA>..<reserved-ABFF>
D7A4..D7AF ; Cn # [12] <reserved-D7A4>..<reserved-D7AF>
@ -372,7 +369,7 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
10103..10106 ; Cn # [4] <reserved-10103>..<reserved-10106>
10134..10136 ; Cn # [3] <reserved-10134>..<reserved-10136>
1018F ; Cn # <reserved-1018F>
1019C..1019F ; Cn # [4] <reserved-1019C>..<reserved-1019F>
1019D..1019F ; Cn # [3] <reserved-1019D>..<reserved-1019F>
101A1..101CF ; Cn # [47] <reserved-101A1>..<reserved-101CF>
101FE..1027F ; Cn # [130] <reserved-101FE>..<reserved-1027F>
1029D..1029F ; Cn # [3] <reserved-1029D>..<reserved-1029F>
@ -431,9 +428,13 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
10CF3..10CF9 ; Cn # [7] <reserved-10CF3>..<reserved-10CF9>
10D28..10D2F ; Cn # [8] <reserved-10D28>..<reserved-10D2F>
10D3A..10E5F ; Cn # [294] <reserved-10D3A>..<reserved-10E5F>
10E7F..10EFF ; Cn # [129] <reserved-10E7F>..<reserved-10EFF>
10E7F ; Cn # <reserved-10E7F>
10EAA ; Cn # <reserved-10EAA>
10EAE..10EAF ; Cn # [2] <reserved-10EAE>..<reserved-10EAF>
10EB2..10EFF ; Cn # [78] <reserved-10EB2>..<reserved-10EFF>
10F28..10F2F ; Cn # [8] <reserved-10F28>..<reserved-10F2F>
10F5A..10FDF ; Cn # [134] <reserved-10F5A>..<reserved-10FDF>
10F5A..10FAF ; Cn # [86] <reserved-10F5A>..<reserved-10FAF>
10FCC..10FDF ; Cn # [20] <reserved-10FCC>..<reserved-10FDF>
10FF7..10FFF ; Cn # [9] <reserved-10FF7>..<reserved-10FFF>
1104E..11051 ; Cn # [4] <reserved-1104E>..<reserved-11051>
11070..1107E ; Cn # [15] <reserved-11070>..<reserved-1107E>
@ -442,9 +443,8 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
110E9..110EF ; Cn # [7] <reserved-110E9>..<reserved-110EF>
110FA..110FF ; Cn # [6] <reserved-110FA>..<reserved-110FF>
11135 ; Cn # <reserved-11135>
11147..1114F ; Cn # [9] <reserved-11147>..<reserved-1114F>
11148..1114F ; Cn # [8] <reserved-11148>..<reserved-1114F>
11177..1117F ; Cn # [9] <reserved-11177>..<reserved-1117F>
111CE..111CF ; Cn # [2] <reserved-111CE>..<reserved-111CF>
111E0 ; Cn # <reserved-111E0>
111F5..111FF ; Cn # [11] <reserved-111F5>..<reserved-111FF>
11212 ; Cn # <reserved-11212>
@ -471,9 +471,8 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
11364..11365 ; Cn # [2] <reserved-11364>..<reserved-11365>
1136D..1136F ; Cn # [3] <reserved-1136D>..<reserved-1136F>
11375..113FF ; Cn # [139] <reserved-11375>..<reserved-113FF>
1145A ; Cn # <reserved-1145A>
1145C ; Cn # <reserved-1145C>
11460..1147F ; Cn # [32] <reserved-11460>..<reserved-1147F>
11462..1147F ; Cn # [30] <reserved-11462>..<reserved-1147F>
114C8..114CF ; Cn # [8] <reserved-114C8>..<reserved-114CF>
114DA..1157F ; Cn # [166] <reserved-114DA>..<reserved-1157F>
115B6..115B7 ; Cn # [2] <reserved-115B6>..<reserved-115B7>
@ -488,7 +487,14 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
11740..117FF ; Cn # [192] <reserved-11740>..<reserved-117FF>
1183C..1189F ; Cn # [100] <reserved-1183C>..<reserved-1189F>
118F3..118FE ; Cn # [12] <reserved-118F3>..<reserved-118FE>
11900..1199F ; Cn # [160] <reserved-11900>..<reserved-1199F>
11907..11908 ; Cn # [2] <reserved-11907>..<reserved-11908>
1190A..1190B ; Cn # [2] <reserved-1190A>..<reserved-1190B>
11914 ; Cn # <reserved-11914>
11917 ; Cn # <reserved-11917>
11936 ; Cn # <reserved-11936>
11939..1193A ; Cn # [2] <reserved-11939>..<reserved-1193A>
11947..1194F ; Cn # [9] <reserved-11947>..<reserved-1194F>
1195A..1199F ; Cn # [70] <reserved-1195A>..<reserved-1199F>
119A8..119A9 ; Cn # [2] <reserved-119A8>..<reserved-119A9>
119D8..119D9 ; Cn # [2] <reserved-119D8>..<reserved-119D9>
119E5..119FF ; Cn # [27] <reserved-119E5>..<reserved-119FF>
@ -515,7 +521,8 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
11D92 ; Cn # <reserved-11D92>
11D99..11D9F ; Cn # [7] <reserved-11D99>..<reserved-11D9F>
11DAA..11EDF ; Cn # [310] <reserved-11DAA>..<reserved-11EDF>
11EF9..11FBF ; Cn # [199] <reserved-11EF9>..<reserved-11FBF>
11EF9..11FAF ; Cn # [183] <reserved-11EF9>..<reserved-11FAF>
11FB1..11FBF ; Cn # [15] <reserved-11FB1>..<reserved-11FBF>
11FF2..11FFE ; Cn # [13] <reserved-11FF2>..<reserved-11FFE>
1239A..123FF ; Cn # [102] <reserved-1239A>..<reserved-123FF>
1246F ; Cn # <reserved-1246F>
@ -539,9 +546,11 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
16F4B..16F4E ; Cn # [4] <reserved-16F4B>..<reserved-16F4E>
16F88..16F8E ; Cn # [7] <reserved-16F88>..<reserved-16F8E>
16FA0..16FDF ; Cn # [64] <reserved-16FA0>..<reserved-16FDF>
16FE4..16FFF ; Cn # [28] <reserved-16FE4>..<reserved-16FFF>
16FE5..16FEF ; Cn # [11] <reserved-16FE5>..<reserved-16FEF>
16FF2..16FFF ; Cn # [14] <reserved-16FF2>..<reserved-16FFF>
187F8..187FF ; Cn # [8] <reserved-187F8>..<reserved-187FF>
18AF3..1AFFF ; Cn # [9485] <reserved-18AF3>..<reserved-1AFFF>
18CD6..18CFF ; Cn # [42] <reserved-18CD6>..<reserved-18CFF>
18D09..1AFFF ; Cn # [8951] <reserved-18D09>..<reserved-1AFFF>
1B11F..1B14F ; Cn # [49] <reserved-1B11F>..<reserved-1B14F>
1B153..1B163 ; Cn # [17] <reserved-1B153>..<reserved-1B163>
1B168..1B16F ; Cn # [8] <reserved-1B168>..<reserved-1B16F>
@ -639,17 +648,15 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
1F0C0 ; Cn # <reserved-1F0C0>
1F0D0 ; Cn # <reserved-1F0D0>
1F0F6..1F0FF ; Cn # [10] <reserved-1F0F6>..<reserved-1F0FF>
1F10D..1F10F ; Cn # [3] <reserved-1F10D>..<reserved-1F10F>
1F16D..1F16F ; Cn # [3] <reserved-1F16D>..<reserved-1F16F>
1F1AD..1F1E5 ; Cn # [57] <reserved-1F1AD>..<reserved-1F1E5>
1F1AE..1F1E5 ; Cn # [56] <reserved-1F1AE>..<reserved-1F1E5>
1F203..1F20F ; Cn # [13] <reserved-1F203>..<reserved-1F20F>
1F23C..1F23F ; Cn # [4] <reserved-1F23C>..<reserved-1F23F>
1F249..1F24F ; Cn # [7] <reserved-1F249>..<reserved-1F24F>
1F252..1F25F ; Cn # [14] <reserved-1F252>..<reserved-1F25F>
1F266..1F2FF ; Cn # [154] <reserved-1F266>..<reserved-1F2FF>
1F6D6..1F6DF ; Cn # [10] <reserved-1F6D6>..<reserved-1F6DF>
1F6D8..1F6DF ; Cn # [8] <reserved-1F6D8>..<reserved-1F6DF>
1F6ED..1F6EF ; Cn # [3] <reserved-1F6ED>..<reserved-1F6EF>
1F6FB..1F6FF ; Cn # [5] <reserved-1F6FB>..<reserved-1F6FF>
1F6FD..1F6FF ; Cn # [3] <reserved-1F6FD>..<reserved-1F6FF>
1F774..1F77F ; Cn # [12] <reserved-1F774>..<reserved-1F77F>
1F7D9..1F7DF ; Cn # [7] <reserved-1F7D9>..<reserved-1F7DF>
1F7EC..1F7FF ; Cn # [20] <reserved-1F7EC>..<reserved-1F7FF>
@ -657,32 +664,36 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
1F848..1F84F ; Cn # [8] <reserved-1F848>..<reserved-1F84F>
1F85A..1F85F ; Cn # [6] <reserved-1F85A>..<reserved-1F85F>
1F888..1F88F ; Cn # [8] <reserved-1F888>..<reserved-1F88F>
1F8AE..1F8FF ; Cn # [82] <reserved-1F8AE>..<reserved-1F8FF>
1F90C ; Cn # <reserved-1F90C>
1F972 ; Cn # <reserved-1F972>
1F977..1F979 ; Cn # [3] <reserved-1F977>..<reserved-1F979>
1F9A3..1F9A4 ; Cn # [2] <reserved-1F9A3>..<reserved-1F9A4>
1F9AB..1F9AD ; Cn # [3] <reserved-1F9AB>..<reserved-1F9AD>
1F9CB..1F9CC ; Cn # [2] <reserved-1F9CB>..<reserved-1F9CC>
1F8AE..1F8AF ; Cn # [2] <reserved-1F8AE>..<reserved-1F8AF>
1F8B2..1F8FF ; Cn # [78] <reserved-1F8B2>..<reserved-1F8FF>
1F979 ; Cn # <reserved-1F979>
1F9CC ; Cn # <reserved-1F9CC>
1FA54..1FA5F ; Cn # [12] <reserved-1FA54>..<reserved-1FA5F>
1FA6E..1FA6F ; Cn # [2] <reserved-1FA6E>..<reserved-1FA6F>
1FA74..1FA77 ; Cn # [4] <reserved-1FA74>..<reserved-1FA77>
1FA75..1FA77 ; Cn # [3] <reserved-1FA75>..<reserved-1FA77>
1FA7B..1FA7F ; Cn # [5] <reserved-1FA7B>..<reserved-1FA7F>
1FA83..1FA8F ; Cn # [13] <reserved-1FA83>..<reserved-1FA8F>
1FA96..1FFFF ; Cn # [1386] <reserved-1FA96>..<noncharacter-1FFFF>
2A6D7..2A6FF ; Cn # [41] <reserved-2A6D7>..<reserved-2A6FF>
1FA87..1FA8F ; Cn # [9] <reserved-1FA87>..<reserved-1FA8F>
1FAA9..1FAAF ; Cn # [7] <reserved-1FAA9>..<reserved-1FAAF>
1FAB7..1FABF ; Cn # [9] <reserved-1FAB7>..<reserved-1FABF>
1FAC3..1FACF ; Cn # [13] <reserved-1FAC3>..<reserved-1FACF>
1FAD7..1FAFF ; Cn # [41] <reserved-1FAD7>..<reserved-1FAFF>
1FB93 ; Cn # <reserved-1FB93>
1FBCB..1FBEF ; Cn # [37] <reserved-1FBCB>..<reserved-1FBEF>
1FBFA..1FFFF ; Cn # [1030] <reserved-1FBFA>..<noncharacter-1FFFF>
2A6DE..2A6FF ; Cn # [34] <reserved-2A6DE>..<reserved-2A6FF>
2B735..2B73F ; Cn # [11] <reserved-2B735>..<reserved-2B73F>
2B81E..2B81F ; Cn # [2] <reserved-2B81E>..<reserved-2B81F>
2CEA2..2CEAF ; Cn # [14] <reserved-2CEA2>..<reserved-2CEAF>
2EBE1..2F7FF ; Cn # [3103] <reserved-2EBE1>..<reserved-2F7FF>
2FA1E..E0000 ; Cn # [722403] <reserved-2FA1E>..<reserved-E0000>
2FA1E..2FFFF ; Cn # [1506] <reserved-2FA1E>..<noncharacter-2FFFF>
3134B..E0000 ; Cn # [715958] <reserved-3134B>..<reserved-E0000>
E0002..E001F ; Cn # [30] <reserved-E0002>..<reserved-E001F>
E0080..E00FF ; Cn # [128] <reserved-E0080>..<reserved-E00FF>
E01F0..EFFFF ; Cn # [65040] <reserved-E01F0>..<noncharacter-EFFFF>
FFFFE..FFFFF ; Cn # [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
10FFFE..10FFFF; Cn # [2] <noncharacter-10FFFE>..<noncharacter-10FFFF>
# Total code points: 836602
# Total code points: 830672
# ================================================
@ -1285,7 +1296,9 @@ A7BA ; Lu # LATIN CAPITAL LETTER GLOTTAL A
A7BC ; Lu # LATIN CAPITAL LETTER GLOTTAL I
A7BE ; Lu # LATIN CAPITAL LETTER GLOTTAL U
A7C2 ; Lu # LATIN CAPITAL LETTER ANGLICANA W
A7C4..A7C6 ; Lu # [3] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER Z WITH PALATAL HOOK
A7C4..A7C7 ; Lu # [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
A7C9 ; Lu # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
A7F5 ; Lu # LATIN CAPITAL LETTER REVERSED HALF H
FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
10400..10427 ; Lu # [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW
104B0..104D3 ; Lu # [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA
@ -1325,7 +1338,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP
1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA
1E900..1E921 ; Lu # [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
# Total code points: 1788
# Total code points: 1791
# ================================================
@ -1932,9 +1945,12 @@ A7BB ; Ll # LATIN SMALL LETTER GLOTTAL A
A7BD ; Ll # LATIN SMALL LETTER GLOTTAL I
A7BF ; Ll # LATIN SMALL LETTER GLOTTAL U
A7C3 ; Ll # LATIN SMALL LETTER ANGLICANA W
A7C8 ; Ll # LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY
A7CA ; Ll # LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
A7F6 ; Ll # LATIN SMALL LETTER REVERSED HALF H
A7FA ; Ll # LATIN LETTER SMALL CAPITAL TURNED M
AB30..AB5A ; Ll # [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
AB60..AB67 ; Ll # [8] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TS DIGRAPH WITH RETROFLEX HOOK
AB60..AB68 ; Ll # [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE
AB70..ABBF ; Ll # [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
FB00..FB06 ; Ll # [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FB13..FB17 ; Ll # [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
@ -1974,7 +1990,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL
1D7CB ; Ll # MATHEMATICAL BOLD SMALL DIGAMMA
1E922..1E943 ; Ll # [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
# Total code points: 2151
# Total code points: 2155
# ================================================
@ -2049,6 +2065,7 @@ AA70 ; Lm # MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
AADD ; Lm # TAI VIET SYMBOL SAM
AAF3..AAF4 ; Lm # [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK
AB5C..AB5F ; Lm # [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
AB69 ; Lm # MODIFIER LETTER SMALL TURNED W
FF70 ; Lm # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
16B40..16B43 ; Lm # [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM
@ -2058,7 +2075,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
1E137..1E13D ; Lm # [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
1E94B ; Lm # ADLAM NASALIZATION MARK
# Total code points: 259
# Total code points: 260
# ================================================
@ -2088,7 +2105,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
0840..0858 ; Lo # [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
0860..086A ; Lo # [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
08A0..08B4 ; Lo # [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
08B6..08BD ; Lo # [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
08B6..08C7 ; Lo # [18] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
0904..0939 ; Lo # [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
093D ; Lo # DEVANAGARI SIGN AVAGRAHA
0950 ; Lo # DEVANAGARI OM
@ -2164,7 +2181,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
0CDE ; Lo # KANNADA LETTER FA
0CE0..0CE1 ; Lo # [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CF1..0CF2 ; Lo # [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
0D05..0D0C ; Lo # [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
0D04..0D0C ; Lo # [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
0D0E..0D10 ; Lo # [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
0D12..0D3A ; Lo # [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
0D3D ; Lo # MALAYALAM SIGN AVAGRAHA
@ -2277,10 +2294,10 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
30FF ; Lo # KATAKANA DIGRAPH KOTO
3105..312F ; Lo # [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN
3131..318E ; Lo # [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
31A0..31BA ; Lo # [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
31A0..31BF ; Lo # [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
31F0..31FF ; Lo # [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
3400..4DB5 ; Lo # [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
4E00..9FEF ; Lo # [20976] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEF
3400..4DBF ; Lo # [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
4E00..9FFC ; Lo # [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
A000..A014 ; Lo # [21] YI SYLLABLE IT..YI SYLLABLE E
A016..A48C ; Lo # [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
A4D0..A4F7 ; Lo # [40] LISU LETTER BA..LISU LETTER OE
@ -2404,15 +2421,19 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
10B80..10B91 ; Lo # [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW
10C00..10C48 ; Lo # [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
10D00..10D23 ; Lo # [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
10E80..10EA9 ; Lo # [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EB0..10EB1 ; Lo # [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
10F00..10F1C ; Lo # [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F27 ; Lo # OLD SOGDIAN LIGATURE AYIN-DALETH
10F30..10F45 ; Lo # [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
10FB0..10FC4 ; Lo # [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW
10FE0..10FF6 ; Lo # [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH
11003..11037 ; Lo # [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA
11083..110AF ; Lo # [45] KAITHI LETTER A..KAITHI LETTER HA
110D0..110E8 ; Lo # [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE
11103..11126 ; Lo # [36] CHAKMA LETTER AA..CHAKMA LETTER HAA
11144 ; Lo # CHAKMA LETTER LHAA
11147 ; Lo # CHAKMA LETTER VAA
11150..11172 ; Lo # [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA
11176 ; Lo # MAHAJANI LIGATURE SHRI
11183..111B2 ; Lo # [48] SHARADA LETTER A..SHARADA LETTER HA
@ -2438,7 +2459,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
1135D..11361 ; Lo # [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL
11400..11434 ; Lo # [53] NEWA LETTER A..NEWA LETTER HA
11447..1144A ; Lo # [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
1145F ; Lo # NEWA LETTER VEDIC ANUSVARA
1145F..11461 ; Lo # [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA
11480..114AF ; Lo # [48] TIRHUTA ANJI..TIRHUTA LETTER HA
114C4..114C5 ; Lo # [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG
114C7 ; Lo # TIRHUTA OM
@ -2450,7 +2471,13 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
116B8 ; Lo # TAKRI LETTER ARCHAIC KHA
11700..1171A ; Lo # [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
11800..1182B ; Lo # [44] DOGRA LETTER A..DOGRA LETTER RRA
118FF ; Lo # WARANG CITI OM
118FF..11906 ; Lo # [8] WARANG CITI OM..DIVES AKURU LETTER E
11909 ; Lo # DIVES AKURU LETTER O
1190C..11913 ; Lo # [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA
11915..11916 ; Lo # [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA
11918..1192F ; Lo # [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA
1193F ; Lo # DIVES AKURU PREFIXED NASAL SIGN
11941 ; Lo # DIVES AKURU INITIAL RA
119A0..119A7 ; Lo # [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR
119AA..119D0 ; Lo # [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA
119E1 ; Lo # NANDINAGARI SIGN AVAGRAHA
@ -2475,6 +2502,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
11D6A..11D89 ; Lo # [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA
11D98 ; Lo # GUNJALA GONDI OM
11EE0..11EF2 ; Lo # [19] MAKASAR LETTER KA..MAKASAR ANGKA
11FB0 ; Lo # LISU LETTER YHA
12000..12399 ; Lo # [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
12480..12543 ; Lo # [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
13000..1342E ; Lo # [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
@ -2488,7 +2516,8 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
16F00..16F4A ; Lo # [75] MIAO LETTER PA..MIAO LETTER RTE
16F50 ; Lo # MIAO LETTER NASALIZATION
17000..187F7 ; Lo # [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
18800..18AF2 ; Lo # [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
18800..18CD5 ; Lo # [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
18D00..18D08 ; Lo # [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
1B000..1B11E ; Lo # [287] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER N-MU-MO-2
1B150..1B152 ; Lo # [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
1B164..1B167 ; Lo # [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
@ -2534,14 +2563,15 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
1EEA1..1EEA3 ; Lo # [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL
1EEA5..1EEA9 ; Lo # [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH
1EEAB..1EEBB ; Lo # [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
20000..2A6D6 ; Lo # [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
20000..2A6DD ; Lo # [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
2A700..2B734 ; Lo # [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2B740..2B81D ; Lo # [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Lo # [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Lo # [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2F800..2FA1D ; Lo # [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
# Total code points: 121414
# Total code points: 127004
# ================================================
@ -2605,7 +2635,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
0B3F ; Mn # ORIYA VOWEL SIGN I
0B41..0B44 ; Mn # [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
0B4D ; Mn # ORIYA SIGN VIRAMA
0B56 ; Mn # ORIYA AI LENGTH MARK
0B55..0B56 ; Mn # [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
0B62..0B63 ; Mn # [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
0B82 ; Mn # TAMIL SIGN ANUSVARA
0BC0 ; Mn # TAMIL VOWEL SIGN II
@ -2628,6 +2658,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
0D41..0D44 ; Mn # [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
0D4D ; Mn # MALAYALAM SIGN VIRAMA
0D62..0D63 ; Mn # [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
0D81 ; Mn # SINHALA SIGN CANDRABINDU
0DCA ; Mn # SINHALA SIGN AL-LAKUNA
0DD2..0DD4 ; Mn # [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
0DD6 ; Mn # SINHALA VOWEL SIGN DIGA PAA-PILLA
@ -2685,6 +2716,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
1A73..1A7C ; Mn # [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
1A7F ; Mn # TAI THAM COMBINING CRYPTOGRAMMIC DOT
1AB0..1ABD ; Mn # [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1ABF..1AC0 ; Mn # [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
1B00..1B03 ; Mn # [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B34 ; Mn # BALINESE SIGN REREKAN
1B36..1B3A ; Mn # [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
@ -2725,6 +2757,7 @@ A802 ; Mn # SYLOTI NAGRI SIGN DVISVARA
A806 ; Mn # SYLOTI NAGRI SIGN HASANTA
A80B ; Mn # SYLOTI NAGRI SIGN ANUSVARA
A825..A826 ; Mn # [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A82C ; Mn # SYLOTI NAGRI SIGN ALTERNATE HASANTA
A8C4..A8C5 ; Mn # [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
A8E0..A8F1 ; Mn # [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A8FF ; Mn # DEVANAGARI VOWEL SIGN AY
@ -2764,6 +2797,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
10A3F ; Mn # KHAROSHTHI VIRAMA
10AE5..10AE6 ; Mn # [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D24..10D27 ; Mn # [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Mn # [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10F46..10F50 ; Mn # [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
11001 ; Mn # BRAHMI SIGN ANUSVARA
11038..11046 ; Mn # [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
@ -2777,6 +2811,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
11180..11181 ; Mn # [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA
111B6..111BE ; Mn # [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
111C9..111CC ; Mn # [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
111CF ; Mn # SHARADA SIGN INVERTED CANDRABINDU
1122F..11231 ; Mn # [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
11234 ; Mn # KHOJKI SIGN ANUSVARA
11236..11237 ; Mn # [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
@ -2812,6 +2847,9 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
11727..1172B ; Mn # [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
1182F..11837 ; Mn # [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
11839..1183A ; Mn # [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA
1193B..1193C ; Mn # [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
1193E ; Mn # DIVES AKURU VIRAMA
11943 ; Mn # DIVES AKURU SIGN NUKTA
119D4..119D7 ; Mn # [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR
119DA..119DB ; Mn # [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI
119E0 ; Mn # NANDINAGARI SIGN VIRAMA
@ -2843,6 +2881,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
16B30..16B36 ; Mn # [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16F4F ; Mn # MIAO SIGN CONSONANT MODIFIER BAR
16F8F..16F92 ; Mn # [4] MIAO TONE RIGHT..MIAO TONE BELOW
16FE4 ; Mn # KHITAN SMALL SCRIPT FILLER
1BC9D..1BC9E ; Mn # [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
1D167..1D169 ; Mn # [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D17B..1D182 ; Mn # [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
@ -2866,7 +2905,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 1826
# Total code points: 1839
# ================================================
@ -3003,6 +3042,7 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK
11182 ; Mc # SHARADA SIGN VISARGA
111B3..111B5 ; Mc # [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II
111BF..111C0 ; Mc # [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
111CE ; Mc # SHARADA VOWEL SIGN PRISHTHAMATRA E
1122C..1122E ; Mc # [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
11232..11233 ; Mc # [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
11235 ; Mc # KHOJKI SIGN VIRAMA
@ -3034,6 +3074,11 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK
11726 ; Mc # AHOM VOWEL SIGN E
1182C..1182E ; Mc # [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II
11838 ; Mc # DOGRA SIGN VISARGA
11930..11935 ; Mc # [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E
11937..11938 ; Mc # [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
1193D ; Mc # DIVES AKURU SIGN HALANTA
11940 ; Mc # DIVES AKURU MEDIAL YA
11942 ; Mc # DIVES AKURU MEDIAL RA
119D1..119D3 ; Mc # [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II
119DC..119DF ; Mc # [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA
119E4 ; Mc # NANDINAGARI VOWEL SIGN PRISHTHAMATRA E
@ -3050,10 +3095,11 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK
11D96 ; Mc # GUNJALA GONDI SIGN VISARGA
11EF5..11EF6 ; Mc # [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
16F51..16F87 ; Mc # [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
16FF0..16FF1 ; Mc # [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
1D165..1D166 ; Mc # [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D16D..1D172 ; Mc # [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
# Total code points: 429
# Total code points: 443
# ================================================
@ -3109,6 +3155,7 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
116C0..116C9 ; Nd # [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
11730..11739 ; Nd # [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
118E0..118E9 ; Nd # [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
11950..11959 ; Nd # [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
11C50..11C59 ; Nd # [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
11D50..11D59 ; Nd # [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
11DA0..11DA9 ; Nd # [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
@ -3118,8 +3165,9 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
1E140..1E149 ; Nd # [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
1E2F0..1E2F9 ; Nd # [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
1E950..1E959 ; Nd # [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
1FBF0..1FBF9 ; Nd # [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
# Total code points: 630
# Total code points: 650
# ================================================
@ -3197,6 +3245,7 @@ A830..A835 ; No # [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTIO
10E60..10E7E ; No # [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
10F1D..10F26 ; No # [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
10F51..10F54 ; No # [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED
10FC5..10FCB ; No # [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED
11052..11065 ; No # [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND
111E1..111F4 ; No # [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND
1173A..1173B ; No # [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY
@ -3215,7 +3264,7 @@ A830..A835 ; No # [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTIO
1ED2F..1ED3D ; No # [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH
1F100..1F10C ; No # [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
# Total code points: 888
# Total code points: 895
# ================================================
@ -3322,8 +3371,9 @@ FE31..FE32 ; Pd # [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION
FE58 ; Pd # SMALL EM DASH
FE63 ; Pd # SMALL HYPHEN-MINUS
FF0D ; Pd # FULLWIDTH HYPHEN-MINUS
10EAD ; Pd # YEZIDI HYPHENATION MARK
# Total code points: 24
# Total code points: 25
# ================================================
@ -3591,6 +3641,7 @@ FF3F ; Pc # FULLWIDTH LOW LINE
2E3C..2E3F ; Po # [4] STENOGRAPHIC FULL STOP..CAPITULUM
2E41 ; Po # REVERSED COMMA
2E43..2E4F ; Po # [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER
2E52 ; Po # TIRONIAN SIGN CAPITAL ET
3001..3003 ; Po # [3] IDEOGRAPHIC COMMA..DITTO MARK
303D ; Po # PART ALTERNATION MARK
30FB ; Po # KATAKANA MIDDLE DOT
@ -3656,7 +3707,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL
11238..1123D ; Po # [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN
112A9 ; Po # MULTANI SECTION MARK
1144B..1144F ; Po # [5] NEWA DANDA..NEWA ABBREVIATION SIGN
1145B ; Po # NEWA PLACEHOLDER MARK
1145A..1145B ; Po # [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK
1145D ; Po # NEWA INSERTION SIGN
114C6 ; Po # TIRHUTA ABBREVIATION SIGN
115C1..115D7 ; Po # [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
@ -3664,6 +3715,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL
11660..1166C ; Po # [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT
1173C..1173E ; Po # [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
1183B ; Po # DOGRA ABBREVIATION SIGN
11944..11946 ; Po # [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK
119E2 ; Po # NANDINAGARI SIGN SIDDHAM
11A3F..11A46 ; Po # [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK
11A9A..11A9C ; Po # [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD
@ -3683,7 +3735,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL
1DA87..1DA8B ; Po # [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS
1E95E..1E95F ; Po # [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK
# Total code points: 588
# Total code points: 593
# ================================================
@ -3812,13 +3864,14 @@ A700..A716 ; Sk # [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTE
A720..A721 ; Sk # [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
A789..A78A ; Sk # [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
AB5B ; Sk # MODIFIER BREVE WITH INVERTED BREVE
AB6A..AB6B ; Sk # [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK
FBB2..FBC1 ; Sk # [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
FF3E ; Sk # FULLWIDTH CIRCUMFLEX ACCENT
FF40 ; Sk # FULLWIDTH GRAVE ACCENT
FFE3 ; Sk # FULLWIDTH MACRON
1F3FB..1F3FF ; Sk # [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
# Total code points: 121
# Total code points: 123
# ================================================
@ -3904,8 +3957,9 @@ FFE3 ; Sk # FULLWIDTH MACRON
2B45..2B46 ; So # [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
2B4D..2B73 ; So # [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
2B76..2B95 ; So # [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
2B98..2BFF ; So # [104] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..HELLSCHREIBER PAUSE SYMBOL
2B97..2BFF ; So # [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL
2CE5..2CEA ; So # [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA
2E50..2E51 ; So # [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR
2E80..2E99 ; So # [26] CJK RADICAL REPEAT..CJK RADICAL RAP
2E9B..2EF3 ; So # [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
2F00..2FD5 ; So # [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
@ -3938,7 +3992,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
10137..1013F ; So # [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
10179..10189 ; So # [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
1018C..1018E ; So # [3] GREEK SINUSOID SIGN..NOMISMA SIGN
10190..1019B ; So # [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
10190..1019C ; So # [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL
101A0 ; So # GREEK SYMBOL TAU RHO
101D0..101FC ; So # [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
10877..10878 ; So # [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON
@ -3973,17 +4027,16 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
1F0B1..1F0BF ; So # [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER
1F0C1..1F0CF ; So # [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
1F0D1..1F0F5 ; So # [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21
1F110..1F16C ; So # [93] PARENTHESIZED LATIN CAPITAL LETTER A..RAISED MR SIGN
1F170..1F1AC ; So # [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD
1F10D..1F1AD ; So # [161] CIRCLED ZERO WITH SLASH..MASK WORK SYMBOL
1F1E6..1F202 ; So # [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA
1F210..1F23B ; So # [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
1F240..1F248 ; So # [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
1F250..1F251 ; So # [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
1F260..1F265 ; So # [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
1F300..1F3FA ; So # [251] CYCLONE..AMPHORA
1F400..1F6D5 ; So # [726] RAT..HINDU TEMPLE
1F400..1F6D7 ; So # [728] RAT..ELEVATOR
1F6E0..1F6EC ; So # [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
1F6F0..1F6FA ; So # [11] SATELLITE..AUTO RICKSHAW
1F6F0..1F6FC ; So # [13] SATELLITE..ROLLER SKATE
1F700..1F773 ; So # [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
1F780..1F7D8 ; So # [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE
1F7E0..1F7EB ; So # [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
@ -3992,20 +4045,22 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
1F850..1F859 ; So # [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
1F860..1F887 ; So # [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F890..1F8AD ; So # [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
1F900..1F90B ; So # [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
1F90D..1F971 ; So # [101] WHITE HEART..YAWNING FACE
1F973..1F976 ; So # [4] FACE WITH PARTY HORN AND PARTY HAT..FREEZING FACE
1F97A..1F9A2 ; So # [41] FACE WITH PLEADING EYES..SWAN
1F9A5..1F9AA ; So # [6] SLOTH..OYSTER
1F9AE..1F9CA ; So # [29] GUIDE DOG..ICE CUBE
1F8B0..1F8B1 ; So # [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
1F900..1F978 ; So # [121] CIRCLED CROSS FORMEE WITH FOUR DOTS..DISGUISED FACE
1F97A..1F9CB ; So # [82] FACE WITH PLEADING EYES..BUBBLE TEA
1F9CD..1FA53 ; So # [135] STANDING PERSON..BLACK CHESS KNIGHT-BISHOP
1FA60..1FA6D ; So # [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
1FA70..1FA73 ; So # [4] BALLET SHOES..SHORTS
1FA70..1FA74 ; So # [5] BALLET SHOES..THONG SANDAL
1FA78..1FA7A ; So # [3] DROP OF BLOOD..STETHOSCOPE
1FA80..1FA82 ; So # [3] YO-YO..PARACHUTE
1FA90..1FA95 ; So # [6] RINGED PLANET..BANJO
1FA80..1FA86 ; So # [7] YO-YO..NESTING DOLLS
1FA90..1FAA8 ; So # [25] RINGED PLANET..ROCK
1FAB0..1FAB6 ; So # [7] FLY..FEATHER
1FAC0..1FAC2 ; So # [3] ANATOMICAL HEART..PEOPLE HUGGING
1FAD0..1FAD6 ; So # [7] BLUEBERRIES..TEAPOT
1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
1FB94..1FBCA ; So # [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
# Total code points: 6161
# Total code points: 6431
# ================================================

View File

@ -1,5 +1,5 @@
# GraphemeBreakProperty-12.1.0.txt
# Date: 2019-03-10, 10:53:12 GMT
# GraphemeBreakProperty-13.0.0.txt
# Date: 2019-10-21, 14:30:35 GMT
# © 2019 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
@ -26,11 +26,13 @@
110BD ; Prepend # Cf KAITHI NUMBER SIGN
110CD ; Prepend # Cf KAITHI NUMBER SIGN ABOVE
111C2..111C3 ; Prepend # Lo [2] SHARADA SIGN JIHVAMULIYA..SHARADA SIGN UPADHMANIYA
1193F ; Prepend # Lo DIVES AKURU PREFIXED NASAL SIGN
11941 ; Prepend # Lo DIVES AKURU INITIAL RA
11A3A ; Prepend # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
11A84..11A89 ; Prepend # Lo [6] SOYOMBO SIGN JIHVAMULIYA..SOYOMBO CLUSTER-INITIAL LETTER SA
11D46 ; Prepend # Lo MASARAM GONDI REPHA
# Total code points: 22
# Total code points: 24
# ================================================
@ -139,7 +141,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0B3F ; Extend # Mn ORIYA VOWEL SIGN I
0B41..0B44 ; Extend # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
0B4D ; Extend # Mn ORIYA SIGN VIRAMA
0B56 ; Extend # Mn ORIYA AI LENGTH MARK
0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
0B57 ; Extend # Mc ORIYA AU LENGTH MARK
0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
0B82 ; Extend # Mn TAMIL SIGN ANUSVARA
@ -169,6 +171,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA
0D57 ; Extend # Mc MALAYALAM AU LENGTH MARK
0D62..0D63 ; Extend # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
0D81 ; Extend # Mn SINHALA SIGN CANDRABINDU
0DCA ; Extend # Mn SINHALA SIGN AL-LAKUNA
0DCF ; Extend # Mc SINHALA VOWEL SIGN AELA-PILLA
0DD2..0DD4 ; Extend # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
@ -229,6 +232,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY
1ABF..1AC0 ; Extend # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B34 ; Extend # Mn BALINESE SIGN REREKAN
1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG
@ -275,6 +279,7 @@ A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA
A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA
A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA
A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A82C ; Extend # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A8FF ; Extend # Mn DEVANAGARI VOWEL SIGN AY
@ -315,6 +320,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
10A3F ; Extend # Mn KHAROSHTHI VIRAMA
10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
11001 ; Extend # Mn BRAHMI SIGN ANUSVARA
11038..11046 ; Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
@ -328,6 +334,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA
111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
111C9..111CC ; Extend # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
111CF ; Extend # Mn SHARADA SIGN INVERTED CANDRABINDU
1122F..11231 ; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
11234 ; Extend # Mn KHOJKI SIGN ANUSVARA
11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
@ -368,6 +375,10 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
1182F..11837 ; Extend # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
11839..1183A ; Extend # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA
11930 ; Extend # Mc DIVES AKURU VOWEL SIGN AA
1193B..1193C ; Extend # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
1193E ; Extend # Mn DIVES AKURU VIRAMA
11943 ; Extend # Mn DIVES AKURU SIGN NUKTA
119D4..119D7 ; Extend # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR
119DA..119DB ; Extend # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI
119E0 ; Extend # Mn NANDINAGARI SIGN VIRAMA
@ -399,6 +410,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR
16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
16FE4 ; Extend # Mn KHITAN SMALL SCRIPT FILLER
1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
1D165 ; Extend # Mc MUSICAL SYMBOL COMBINING STEM
1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
@ -426,7 +438,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 1970
# Total code points: 1984
# ================================================
@ -539,6 +551,7 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
11182 ; SpacingMark # Mc SHARADA SIGN VISARGA
111B3..111B5 ; SpacingMark # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II
111BF..111C0 ; SpacingMark # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
111CE ; SpacingMark # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E
1122C..1122E ; SpacingMark # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
11232..11233 ; SpacingMark # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
11235 ; SpacingMark # Mc KHOJKI SIGN VIRAMA
@ -570,6 +583,11 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
11726 ; SpacingMark # Mc AHOM VOWEL SIGN E
1182C..1182E ; SpacingMark # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II
11838 ; SpacingMark # Mc DOGRA SIGN VISARGA
11931..11935 ; SpacingMark # Mc [5] DIVES AKURU VOWEL SIGN I..DIVES AKURU VOWEL SIGN E
11937..11938 ; SpacingMark # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
1193D ; SpacingMark # Mc DIVES AKURU SIGN HALANTA
11940 ; SpacingMark # Mc DIVES AKURU MEDIAL YA
11942 ; SpacingMark # Mc DIVES AKURU MEDIAL RA
119D1..119D3 ; SpacingMark # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II
119DC..119DF ; SpacingMark # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA
119E4 ; SpacingMark # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E
@ -586,10 +604,11 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
11D96 ; SpacingMark # Mc GUNJALA GONDI SIGN VISARGA
11EF5..11EF6 ; SpacingMark # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
16F51..16F87 ; SpacingMark # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
16FF0..16FF1 ; SpacingMark # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT
# Total code points: 375
# Total code points: 388
# ================================================

View File

@ -1,6 +1,6 @@
# ScriptExtensions-12.1.0.txt
# Date: 2019-04-01, 09:10:42 GMT
# © 2019 Unicode®, Inc.
# ScriptExtensions-13.0.0.txt
# Date: 2020-01-22, 00:07:43 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -147,19 +147,10 @@
# Script_Extensions=Arab Thaa
0660..0669 ; Arab Thaa # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
FDF2 ; Arab Thaa # Lo ARABIC LIGATURE ALLAH ISOLATED FORM
FDFD ; Arab Thaa # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
# Total code points: 12
# ================================================
# Script_Extensions=Armn Geor
0589 ; Armn Geor # Po ARMENIAN FULL STOP
# Total code points: 1
# Total code points: 2
# ================================================
@ -229,6 +220,14 @@ A66F ; Cyrl Glag # Mn COMBINING CYRILLIC VZMET
# ================================================
# Script_Extensions=Cyrl Syrc
1DF8 ; Cyrl Syrc # Mn COMBINING DOT ABOVE LEFT
# Total code points: 1
# ================================================
# Script_Extensions=Deva Gran
1CD3 ; Deva Gran # Po VEDIC SIGN NIHSHVASA
@ -305,6 +304,14 @@ A8F3 ; Deva Taml # Lo DEVANAGARI SIGN CANDRABINDU VIRAMA
# ================================================
# Script_Extensions=Hani Latn
A700..A707 ; Hani Latn # Sk [8] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER CHINESE TONE YANG RU
# Total code points: 8
# ================================================
# Script_Extensions=Hira Kana
3031..3035 ; Hira Kana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
@ -352,6 +359,14 @@ FF9E..FF9F ; Hira Kana # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFW
# ================================================
# Script_Extensions=Arab Thaa Yezi
0660..0669 ; Arab Thaa Yezi # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
# Total code points: 10
# ================================================
# Script_Extensions=Beng Cakm Sylo
09E6..09EF ; Beng Cakm Sylo # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
@ -409,16 +424,6 @@ A92E ; Kali Latn Mymr # Po KAYAH LI SIGN CWI
# ================================================
# Script_Extensions=Arab Rohg Syrc Thaa
060C ; Arab Rohg Syrc Thaa # Po ARABIC COMMA
061B ; Arab Rohg Syrc Thaa # Po ARABIC SEMICOLON
061F ; Arab Rohg Syrc Thaa # Po ARABIC QUESTION MARK
# Total code points: 3
# ================================================
# Script_Extensions=Beng Deva Gran Knda
1CD0 ; Beng Deva Gran Knda # Mn VEDIC TONE KARSHANA
@ -444,6 +449,16 @@ A92E ; Kali Latn Mymr # Po KAYAH LI SIGN CWI
# ================================================
# Script_Extensions=Arab Rohg Syrc Thaa Yezi
060C ; Arab Rohg Syrc Thaa Yezi # Po ARABIC COMMA
061B ; Arab Rohg Syrc Thaa Yezi # Po ARABIC SEMICOLON
061F ; Arab Rohg Syrc Thaa Yezi # Po ARABIC QUESTION MARK
# Total code points: 3
# ================================================
# Script_Extensions=Bopo Hang Hani Hira Kana
3003 ; Bopo Hang Hani Hira Kana # Po DITTO MARK

View File

@ -1,6 +1,6 @@
# Scripts-12.1.0.txt
# Date: 2019-04-01, 09:10:42 GMT
# © 2019 Unicode®, Inc.
# Scripts-13.0.0.txt
# Date: 2020-01-22, 00:07:43 GMT
# © 2020 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
@ -89,7 +89,6 @@
037E ; Common # Po GREEK QUESTION MARK
0385 ; Common # Sk GREEK DIALYTIKA TONOS
0387 ; Common # Po GREEK ANO TELEIA
0589 ; Common # Po ARMENIAN FULL STOP
0605 ; Common # Cf ARABIC NUMBER MARK ABOVE
060C ; Common # Po ARABIC COMMA
061B ; Common # Po ARABIC SEMICOLON
@ -308,7 +307,7 @@
2B47..2B4C ; Common # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
2B4D..2B73 ; Common # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
2B76..2B95 ; Common # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
2B98..2BFF ; Common # So [104] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..HELLSCHREIBER PAUSE SYMBOL
2B97..2BFF ; Common # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL
2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET
2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET
@ -347,6 +346,8 @@
2E41 ; Common # Po REVERSED COMMA
2E42 ; Common # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
2E43..2E4F ; Common # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER
2E50..2E51 ; Common # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR
2E52 ; Common # Po TIRONIAN SIGN CAPITAL ET
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
3000 ; Common # Zs IDEOGRAPHIC SPACE
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
@ -414,6 +415,7 @@ A839 ; Common # So NORTH INDIC QUANTITY MARK
A92E ; Common # Po KAYAH LI SIGN CWI
A9CF ; Common # Lm JAVANESE PANGRANGKEP
AB5B ; Common # Sk MODIFIER BREVE WITH INVERTED BREVE
AB6A..AB6B ; Common # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK
FD3E ; Common # Pe ORNATE LEFT PARENTHESIS
FD3F ; Common # Ps ORNATE RIGHT PARENTHESIS
FE10..FE16 ; Common # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK
@ -506,7 +508,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
10100..10102 ; Common # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK
10107..10133 ; Common # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
10137..1013F ; Common # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
10190..1019B ; Common # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
10190..1019C ; Common # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL
101D0..101FC ; Common # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
102E1..102FB ; Common # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
16FE2 ; Common # Po OLD CHINESE HOOK MARK
@ -581,8 +583,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1F0C1..1F0CF ; Common # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
1F0D1..1F0F5 ; Common # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21
1F100..1F10C ; Common # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
1F110..1F16C ; Common # So [93] PARENTHESIZED LATIN CAPITAL LETTER A..RAISED MR SIGN
1F170..1F1AC ; Common # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD
1F10D..1F1AD ; Common # So [161] CIRCLED ZERO WITH SLASH..MASK WORK SYMBOL
1F1E6..1F1FF ; Common # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
1F201..1F202 ; Common # So [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA
1F210..1F23B ; Common # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
@ -591,9 +592,9 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1F260..1F265 ; Common # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
1F300..1F3FA ; Common # So [251] CYCLONE..AMPHORA
1F3FB..1F3FF ; Common # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
1F400..1F6D5 ; Common # So [726] RAT..HINDU TEMPLE
1F400..1F6D7 ; Common # So [728] RAT..ELEVATOR
1F6E0..1F6EC ; Common # So [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
1F6F0..1F6FA ; Common # So [11] SATELLITE..AUTO RICKSHAW
1F6F0..1F6FC ; Common # So [13] SATELLITE..ROLLER SKATE
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
1F780..1F7D8 ; Common # So [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE
1F7E0..1F7EB ; Common # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
@ -602,22 +603,25 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1F850..1F859 ; Common # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
1F860..1F887 ; Common # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
1F900..1F90B ; Common # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
1F90D..1F971 ; Common # So [101] WHITE HEART..YAWNING FACE
1F973..1F976 ; Common # So [4] FACE WITH PARTY HORN AND PARTY HAT..FREEZING FACE
1F97A..1F9A2 ; Common # So [41] FACE WITH PLEADING EYES..SWAN
1F9A5..1F9AA ; Common # So [6] SLOTH..OYSTER
1F9AE..1F9CA ; Common # So [29] GUIDE DOG..ICE CUBE
1F8B0..1F8B1 ; Common # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
1F900..1F978 ; Common # So [121] CIRCLED CROSS FORMEE WITH FOUR DOTS..DISGUISED FACE
1F97A..1F9CB ; Common # So [82] FACE WITH PLEADING EYES..BUBBLE TEA
1F9CD..1FA53 ; Common # So [135] STANDING PERSON..BLACK CHESS KNIGHT-BISHOP
1FA60..1FA6D ; Common # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
1FA70..1FA73 ; Common # So [4] BALLET SHOES..SHORTS
1FA70..1FA74 ; Common # So [5] BALLET SHOES..THONG SANDAL
1FA78..1FA7A ; Common # So [3] DROP OF BLOOD..STETHOSCOPE
1FA80..1FA82 ; Common # So [3] YO-YO..PARACHUTE
1FA90..1FA95 ; Common # So [6] RINGED PLANET..BANJO
1FA80..1FA86 ; Common # So [7] YO-YO..NESTING DOLLS
1FA90..1FAA8 ; Common # So [25] RINGED PLANET..ROCK
1FAB0..1FAB6 ; Common # So [7] FLY..FEATHER
1FAC0..1FAC2 ; Common # So [3] ANATOMICAL HEART..PEOPLE HUGGING
1FAD0..1FAD6 ; Common # So [7] BLUEBERRIES..TEAPOT
1FB00..1FB92 ; Common # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
1FB94..1FBCA ; Common # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
1FBF0..1FBF9 ; Common # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
E0001 ; Common # Cf LANGUAGE TAG
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 7805
# Total code points: 8087
# ================================================
@ -661,7 +665,8 @@ A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSU
A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT
A790..A7BF ; Latin # L& [48] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER GLOTTAL U
A7C2..A7C6 ; Latin # L& [5] LATIN CAPITAL LETTER ANGLICANA W..LATIN CAPITAL LETTER Z WITH PALATAL HOOK
A7C2..A7CA ; Latin # L& [9] LATIN CAPITAL LETTER ANGLICANA W..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
A7F5..A7F6 ; Latin # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
A7FA ; Latin # L& LATIN LETTER SMALL CAPITAL TURNED M
@ -669,12 +674,13 @@ A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGR
AB30..AB5A ; Latin # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
AB5C..AB5F ; Latin # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
AB60..AB64 ; Latin # L& [5] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER INVERTED ALPHA
AB66..AB67 ; Latin # L& [2] LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK..LATIN SMALL LETTER TS DIGRAPH WITH RETROFLEX HOOK
AB66..AB68 ; Latin # L& [3] LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE
AB69 ; Latin # Lm MODIFIER LETTER SMALL TURNED W
FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
# Total code points: 1366
# Total code points: 1374
# ================================================
@ -769,12 +775,13 @@ FE2E..FE2F ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBININ
0559 ; Armenian # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
055A..055F ; Armenian # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
0560..0588 ; Armenian # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE
0589 ; Armenian # Po ARMENIAN FULL STOP
058A ; Armenian # Pd ARMENIAN HYPHEN
058D..058E ; Armenian # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN
058F ; Armenian # Sc ARMENIAN DRAM SIGN
FB13..FB17 ; Armenian # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
# Total code points: 95
# Total code points: 96
# ================================================
@ -837,7 +844,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
08A0..08B4 ; Arabic # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
08B6..08BD ; Arabic # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
08B6..08C7 ; Arabic # Lo [18] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
08D3..08E1 ; Arabic # Mn [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA
08E3..08FF ; Arabic # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
@ -886,7 +893,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
# Total code points: 1281
# Total code points: 1291
# ================================================
@ -1051,7 +1058,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0B47..0B48 ; Oriya # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
0B4B..0B4C ; Oriya # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
0B4D ; Oriya # Mn ORIYA SIGN VIRAMA
0B56 ; Oriya # Mn ORIYA AI LENGTH MARK
0B55..0B56 ; Oriya # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
0B57 ; Oriya # Mc ORIYA AU LENGTH MARK
0B5C..0B5D ; Oriya # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA
0B5F..0B61 ; Oriya # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL
@ -1061,7 +1068,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0B71 ; Oriya # Lo ORIYA LETTER WA
0B72..0B77 ; Oriya # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS
# Total code points: 90
# Total code points: 91
# ================================================
@ -1155,7 +1162,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0D00..0D01 ; Malayalam # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
0D04..0D0C ; Malayalam # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
0D12..0D3A ; Malayalam # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
0D3B..0D3C ; Malayalam # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
@ -1177,10 +1184,11 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0D79 ; Malayalam # So MALAYALAM DATE MARK
0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
# Total code points: 117
# Total code points: 118
# ================================================
0D81 ; Sinhala # Mn SINHALA SIGN CANDRABINDU
0D82..0D83 ; Sinhala # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
0D85..0D96 ; Sinhala # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
0D9A..0DB1 ; Sinhala # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
@ -1197,7 +1205,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0DF4 ; Sinhala # Po SINHALA PUNCTUATION KUNDDALIYA
111E1..111F4 ; Sinhala # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND
# Total code points: 110
# Total code points: 111
# ================================================
@ -1515,9 +1523,9 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
02EA..02EB ; Bopomofo # Sk [2] MODIFIER LETTER YIN DEPARTING TONE MARK..MODIFIER LETTER YANG DEPARTING TONE MARK
3105..312F ; Bopomofo # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN
31A0..31BA ; Bopomofo # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
31A0..31BF ; Bopomofo # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
# Total code points: 72
# Total code points: 77
# ================================================
@ -1529,18 +1537,20 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
3021..3029 ; Han # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
3400..4DB5 ; Han # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
4E00..9FEF ; Han # Lo [20976] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEF
3400..4DBF ; Han # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
4E00..9FFC ; Han # Lo [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
F900..FA6D ; Han # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
16FF0..16FF1 ; Han # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
20000..2A6DD ; Han # Lo [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Han # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
# Total code points: 89233
# Total code points: 94204
# ================================================
@ -1583,6 +1593,7 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
0951..0954 ; Inherited # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
1AB0..1ABD ; Inherited # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1ABE ; Inherited # Me COMBINING PARENTHESES OVERLAY
1ABF..1AC0 ; Inherited # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
@ -1610,7 +1621,7 @@ FE20..FE2D ; Inherited # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CON
1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 571
# Total code points: 573
# ================================================
@ -1783,8 +1794,9 @@ A823..A824 ; Syloti_Nagri # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI
A825..A826 ; Syloti_Nagri # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
A827 ; Syloti_Nagri # Mc SYLOTI NAGRI VOWEL SIGN OO
A828..A82B ; Syloti_Nagri # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4
A82C ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
# Total code points: 44
# Total code points: 45
# ================================================
@ -2063,8 +2075,9 @@ AADE..AADF ; Tai_Viet # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI
A4D0..A4F7 ; Lisu # Lo [40] LISU LETTER BA..LISU LETTER OE
A4F8..A4FD ; Lisu # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU
A4FE..A4FF ; Lisu # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
11FB0 ; Lisu # Lo LISU LETTER YHA
# Total code points: 48
# Total code points: 49
# ================================================
@ -2217,8 +2230,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
11140..11143 ; Chakma # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK
11144 ; Chakma # Lo CHAKMA LETTER LHAA
11145..11146 ; Chakma # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI
11147 ; Chakma # Lo CHAKMA LETTER VAA
# Total code points: 70
# Total code points: 71
# ================================================
@ -2259,13 +2273,15 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
111C5..111C8 ; Sharada # Po [4] SHARADA DANDA..SHARADA SEPARATOR
111C9..111CC ; Sharada # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
111CD ; Sharada # Po SHARADA SUTRA MARK
111CE ; Sharada # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E
111CF ; Sharada # Mn SHARADA SIGN INVERTED CANDRABINDU
111D0..111D9 ; Sharada # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE
111DA ; Sharada # Lo SHARADA EKAM
111DB ; Sharada # Po SHARADA SIGN SIDDHAM
111DC ; Sharada # Lo SHARADA HEADSTROKE
111DD..111DF ; Sharada # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2
# Total code points: 94
# Total code points: 96
# ================================================
@ -2650,12 +2666,12 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
11447..1144A ; Newa # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
1144B..1144F ; Newa # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN
11450..11459 ; Newa # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
1145B ; Newa # Po NEWA PLACEHOLDER MARK
1145A..1145B ; Newa # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK
1145D ; Newa # Po NEWA INSERTION SIGN
1145E ; Newa # Mn NEWA SANDHI MARK
1145F ; Newa # Lo NEWA LETTER VEDIC ANUSVARA
1145F..11461 ; Newa # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA
# Total code points: 94
# Total code points: 97
# ================================================
@ -2668,9 +2684,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
16FE0 ; Tangut # Lm TANGUT ITERATION MARK
17000..187F7 ; Tangut # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
18800..18AF2 ; Tangut # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
18800..18AFF ; Tangut # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768
18D00..18D08 ; Tangut # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
# Total code points: 6892
# Total code points: 6914
# ================================================
@ -2835,4 +2852,49 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
# Total code points: 59
# ================================================
10FB0..10FC4 ; Chorasmian # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW
10FC5..10FCB ; Chorasmian # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED
# Total code points: 28
# ================================================
11900..11906 ; Dives_Akuru # Lo [7] DIVES AKURU LETTER A..DIVES AKURU LETTER E
11909 ; Dives_Akuru # Lo DIVES AKURU LETTER O
1190C..11913 ; Dives_Akuru # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA
11915..11916 ; Dives_Akuru # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA
11918..1192F ; Dives_Akuru # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA
11930..11935 ; Dives_Akuru # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E
11937..11938 ; Dives_Akuru # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
1193B..1193C ; Dives_Akuru # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
1193D ; Dives_Akuru # Mc DIVES AKURU SIGN HALANTA
1193E ; Dives_Akuru # Mn DIVES AKURU VIRAMA
1193F ; Dives_Akuru # Lo DIVES AKURU PREFIXED NASAL SIGN
11940 ; Dives_Akuru # Mc DIVES AKURU MEDIAL YA
11941 ; Dives_Akuru # Lo DIVES AKURU INITIAL RA
11942 ; Dives_Akuru # Mc DIVES AKURU MEDIAL RA
11943 ; Dives_Akuru # Mn DIVES AKURU SIGN NUKTA
11944..11946 ; Dives_Akuru # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK
11950..11959 ; Dives_Akuru # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
# Total code points: 72
# ================================================
16FE4 ; Khitan_Small_Script # Mn KHITAN SMALL SCRIPT FILLER
18B00..18CD5 ; Khitan_Small_Script # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5
# Total code points: 471
# ================================================
10E80..10EA9 ; Yezidi # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EAB..10EAC ; Yezidi # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10EAD ; Yezidi # Pd YEZIDI HYPHENATION MARK
10EB0..10EB1 ; Yezidi # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
# Total code points: 47
# EOF

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
* A program for testing the Unicode property table *
***************************************************/
/* Copyright (c) University of Cambridge 2008-2019 */
/* Copyright (c) University of Cambridge 2008-2020 */
/* Compile thus:
@ -255,7 +255,12 @@ const unsigned char *script_names[] = {
US"Elymaic",
US"Nandinagari",
US"Nyiakeng_Puachue_Hmong",
US"Wancho"
US"Wancho",
/* New for Unicode 13.0.0 */
US"Chorasmian",
US"Dives_Akuru",
US"Khitan_Small_Script",
US"Yezidi"
};
const unsigned char *type_names[] = {

View File

@ -40,3 +40,9 @@ findprop 11700 14400 108e0 11280 1d800
findprop 11800 1e903 11da9 10d27 11ee0 16e48 10f27 10f30
findprop a836 a833 1cf4 20f0 1cd0
findprop 32ff
findprop 1f16d
findprop 10e93 10eaa

View File

@ -386,3 +386,13 @@ a833 Number: Other number, Common, Other, [Devanagari, Dogra, Gujarati, Gurmukhi
1cf4 Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Kannada]
20f0 Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Latin]
1cd0 Mark: Non-spacing mark, Inherited, Extend, [Bengali, Devanagari, Grantha, Kannada]
findprop 32ff
32ff Symbol: Other symbol, Common, Other, [Han]
findprop 1f16d
1f16d Symbol: Other symbol, Common, Extended Pictographic
findprop 10e93 10eaa
10e93 Letter: Other letter, Yezidi, Other
10eaa Control: Unassigned, Unknown, Other

View File

@ -265,6 +265,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
#define STRING_Cham0 STR_C STR_h STR_a STR_m "\0"
#define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0"
#define STRING_Chorasmian0 STR_C STR_h STR_o STR_r STR_a STR_s STR_m STR_i STR_a STR_n "\0"
#define STRING_Cn0 STR_C STR_n "\0"
#define STRING_Co0 STR_C STR_o "\0"
#define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0"
@ -275,6 +276,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
#define STRING_Dives_Akuru0 STR_D STR_i STR_v STR_e STR_s STR_UNDERSCORE STR_A STR_k STR_u STR_r STR_u "\0"
#define STRING_Dogra0 STR_D STR_o STR_g STR_r STR_a "\0"
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
@ -306,6 +308,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
#define STRING_Khitan_Small_Script0 STR_K STR_h STR_i STR_t STR_a STR_n STR_UNDERSCORE STR_S STR_m STR_a STR_l STR_l STR_UNDERSCORE STR_S STR_c STR_r STR_i STR_p STR_t "\0"
#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
#define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0"
#define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
@ -429,6 +432,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
#define STRING_Xuc0 STR_X STR_u STR_c "\0"
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
#define STRING_Yezidi0 STR_Y STR_e STR_z STR_i STR_d STR_i "\0"
#define STRING_Yi0 STR_Y STR_i "\0"
#define STRING_Z0 STR_Z "\0"
#define STRING_Zanabazar_Square0 STR_Z STR_a STR_n STR_a STR_b STR_a STR_z STR_a STR_r STR_UNDERSCORE STR_S STR_q STR_u STR_a STR_r STR_e "\0"
@ -464,6 +468,7 @@ const char PRIV(utt_names)[] =
STRING_Chakma0
STRING_Cham0
STRING_Cherokee0
STRING_Chorasmian0
STRING_Cn0
STRING_Co0
STRING_Common0
@ -474,6 +479,7 @@ const char PRIV(utt_names)[] =
STRING_Cyrillic0
STRING_Deseret0
STRING_Devanagari0
STRING_Dives_Akuru0
STRING_Dogra0
STRING_Duployan0
STRING_Egyptian_Hieroglyphs0
@ -505,6 +511,7 @@ const char PRIV(utt_names)[] =
STRING_Katakana0
STRING_Kayah_Li0
STRING_Kharoshthi0
STRING_Khitan_Small_Script0
STRING_Khmer0
STRING_Khojki0
STRING_Khudawadi0
@ -628,6 +635,7 @@ const char PRIV(utt_names)[] =
STRING_Xsp0
STRING_Xuc0
STRING_Xwd0
STRING_Yezidi0
STRING_Yi0
STRING_Z0
STRING_Zanabazar_Square0
@ -663,176 +671,180 @@ const ucp_type_table PRIV(utt)[] = {
{ 203, PT_SC, ucp_Chakma },
{ 210, PT_SC, ucp_Cham },
{ 215, PT_SC, ucp_Cherokee },
{ 224, PT_PC, ucp_Cn },
{ 227, PT_PC, ucp_Co },
{ 230, PT_SC, ucp_Common },
{ 237, PT_SC, ucp_Coptic },
{ 244, PT_PC, ucp_Cs },
{ 247, PT_SC, ucp_Cuneiform },
{ 257, PT_SC, ucp_Cypriot },
{ 265, PT_SC, ucp_Cyrillic },
{ 274, PT_SC, ucp_Deseret },
{ 282, PT_SC, ucp_Devanagari },
{ 293, PT_SC, ucp_Dogra },
{ 299, PT_SC, ucp_Duployan },
{ 308, PT_SC, ucp_Egyptian_Hieroglyphs },
{ 329, PT_SC, ucp_Elbasan },
{ 337, PT_SC, ucp_Elymaic },
{ 345, PT_SC, ucp_Ethiopic },
{ 354, PT_SC, ucp_Georgian },
{ 363, PT_SC, ucp_Glagolitic },
{ 374, PT_SC, ucp_Gothic },
{ 381, PT_SC, ucp_Grantha },
{ 389, PT_SC, ucp_Greek },
{ 395, PT_SC, ucp_Gujarati },
{ 404, PT_SC, ucp_Gunjala_Gondi },
{ 418, PT_SC, ucp_Gurmukhi },
{ 427, PT_SC, ucp_Han },
{ 431, PT_SC, ucp_Hangul },
{ 438, PT_SC, ucp_Hanifi_Rohingya },
{ 454, PT_SC, ucp_Hanunoo },
{ 462, PT_SC, ucp_Hatran },
{ 469, PT_SC, ucp_Hebrew },
{ 476, PT_SC, ucp_Hiragana },
{ 485, PT_SC, ucp_Imperial_Aramaic },
{ 502, PT_SC, ucp_Inherited },
{ 512, PT_SC, ucp_Inscriptional_Pahlavi },
{ 534, PT_SC, ucp_Inscriptional_Parthian },
{ 557, PT_SC, ucp_Javanese },
{ 566, PT_SC, ucp_Kaithi },
{ 573, PT_SC, ucp_Kannada },
{ 581, PT_SC, ucp_Katakana },
{ 590, PT_SC, ucp_Kayah_Li },
{ 599, PT_SC, ucp_Kharoshthi },
{ 610, PT_SC, ucp_Khmer },
{ 616, PT_SC, ucp_Khojki },
{ 623, PT_SC, ucp_Khudawadi },
{ 633, PT_GC, ucp_L },
{ 635, PT_LAMP, 0 },
{ 638, PT_SC, ucp_Lao },
{ 642, PT_SC, ucp_Latin },
{ 648, PT_SC, ucp_Lepcha },
{ 655, PT_SC, ucp_Limbu },
{ 661, PT_SC, ucp_Linear_A },
{ 670, PT_SC, ucp_Linear_B },
{ 679, PT_SC, ucp_Lisu },
{ 684, PT_PC, ucp_Ll },
{ 687, PT_PC, ucp_Lm },
{ 690, PT_PC, ucp_Lo },
{ 693, PT_PC, ucp_Lt },
{ 696, PT_PC, ucp_Lu },
{ 699, PT_SC, ucp_Lycian },
{ 706, PT_SC, ucp_Lydian },
{ 713, PT_GC, ucp_M },
{ 715, PT_SC, ucp_Mahajani },
{ 724, PT_SC, ucp_Makasar },
{ 732, PT_SC, ucp_Malayalam },
{ 742, PT_SC, ucp_Mandaic },
{ 750, PT_SC, ucp_Manichaean },
{ 761, PT_SC, ucp_Marchen },
{ 769, PT_SC, ucp_Masaram_Gondi },
{ 783, PT_PC, ucp_Mc },
{ 786, PT_PC, ucp_Me },
{ 789, PT_SC, ucp_Medefaidrin },
{ 801, PT_SC, ucp_Meetei_Mayek },
{ 814, PT_SC, ucp_Mende_Kikakui },
{ 828, PT_SC, ucp_Meroitic_Cursive },
{ 845, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 866, PT_SC, ucp_Miao },
{ 871, PT_PC, ucp_Mn },
{ 874, PT_SC, ucp_Modi },
{ 879, PT_SC, ucp_Mongolian },
{ 889, PT_SC, ucp_Mro },
{ 893, PT_SC, ucp_Multani },
{ 901, PT_SC, ucp_Myanmar },
{ 909, PT_GC, ucp_N },
{ 911, PT_SC, ucp_Nabataean },
{ 921, PT_SC, ucp_Nandinagari },
{ 933, PT_PC, ucp_Nd },
{ 936, PT_SC, ucp_New_Tai_Lue },
{ 948, PT_SC, ucp_Newa },
{ 953, PT_SC, ucp_Nko },
{ 957, PT_PC, ucp_Nl },
{ 960, PT_PC, ucp_No },
{ 963, PT_SC, ucp_Nushu },
{ 969, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
{ 992, PT_SC, ucp_Ogham },
{ 998, PT_SC, ucp_Ol_Chiki },
{ 1007, PT_SC, ucp_Old_Hungarian },
{ 1021, PT_SC, ucp_Old_Italic },
{ 1032, PT_SC, ucp_Old_North_Arabian },
{ 1050, PT_SC, ucp_Old_Permic },
{ 1061, PT_SC, ucp_Old_Persian },
{ 1073, PT_SC, ucp_Old_Sogdian },
{ 1085, PT_SC, ucp_Old_South_Arabian },
{ 1103, PT_SC, ucp_Old_Turkic },
{ 1114, PT_SC, ucp_Oriya },
{ 1120, PT_SC, ucp_Osage },
{ 1126, PT_SC, ucp_Osmanya },
{ 1134, PT_GC, ucp_P },
{ 1136, PT_SC, ucp_Pahawh_Hmong },
{ 1149, PT_SC, ucp_Palmyrene },
{ 1159, PT_SC, ucp_Pau_Cin_Hau },
{ 1171, PT_PC, ucp_Pc },
{ 1174, PT_PC, ucp_Pd },
{ 1177, PT_PC, ucp_Pe },
{ 1180, PT_PC, ucp_Pf },
{ 1183, PT_SC, ucp_Phags_Pa },
{ 1192, PT_SC, ucp_Phoenician },
{ 1203, PT_PC, ucp_Pi },
{ 1206, PT_PC, ucp_Po },
{ 1209, PT_PC, ucp_Ps },
{ 1212, PT_SC, ucp_Psalter_Pahlavi },
{ 1228, PT_SC, ucp_Rejang },
{ 1235, PT_SC, ucp_Runic },
{ 1241, PT_GC, ucp_S },
{ 1243, PT_SC, ucp_Samaritan },
{ 1253, PT_SC, ucp_Saurashtra },
{ 1264, PT_PC, ucp_Sc },
{ 1267, PT_SC, ucp_Sharada },
{ 1275, PT_SC, ucp_Shavian },
{ 1283, PT_SC, ucp_Siddham },
{ 1291, PT_SC, ucp_SignWriting },
{ 1303, PT_SC, ucp_Sinhala },
{ 1311, PT_PC, ucp_Sk },
{ 1314, PT_PC, ucp_Sm },
{ 1317, PT_PC, ucp_So },
{ 1320, PT_SC, ucp_Sogdian },
{ 1328, PT_SC, ucp_Sora_Sompeng },
{ 1341, PT_SC, ucp_Soyombo },
{ 1349, PT_SC, ucp_Sundanese },
{ 1359, PT_SC, ucp_Syloti_Nagri },
{ 1372, PT_SC, ucp_Syriac },
{ 1379, PT_SC, ucp_Tagalog },
{ 1387, PT_SC, ucp_Tagbanwa },
{ 1396, PT_SC, ucp_Tai_Le },
{ 1403, PT_SC, ucp_Tai_Tham },
{ 1412, PT_SC, ucp_Tai_Viet },
{ 1421, PT_SC, ucp_Takri },
{ 1427, PT_SC, ucp_Tamil },
{ 1433, PT_SC, ucp_Tangut },
{ 1440, PT_SC, ucp_Telugu },
{ 1447, PT_SC, ucp_Thaana },
{ 1454, PT_SC, ucp_Thai },
{ 1459, PT_SC, ucp_Tibetan },
{ 1467, PT_SC, ucp_Tifinagh },
{ 1476, PT_SC, ucp_Tirhuta },
{ 1484, PT_SC, ucp_Ugaritic },
{ 1493, PT_SC, ucp_Unknown },
{ 1501, PT_SC, ucp_Vai },
{ 1505, PT_SC, ucp_Wancho },
{ 1512, PT_SC, ucp_Warang_Citi },
{ 1524, PT_ALNUM, 0 },
{ 1528, PT_PXSPACE, 0 },
{ 1532, PT_SPACE, 0 },
{ 1536, PT_UCNC, 0 },
{ 1540, PT_WORD, 0 },
{ 1544, PT_SC, ucp_Yi },
{ 1547, PT_GC, ucp_Z },
{ 1549, PT_SC, ucp_Zanabazar_Square },
{ 1566, PT_PC, ucp_Zl },
{ 1569, PT_PC, ucp_Zp },
{ 1572, PT_PC, ucp_Zs }
{ 224, PT_SC, ucp_Chorasmian },
{ 235, PT_PC, ucp_Cn },
{ 238, PT_PC, ucp_Co },
{ 241, PT_SC, ucp_Common },
{ 248, PT_SC, ucp_Coptic },
{ 255, PT_PC, ucp_Cs },
{ 258, PT_SC, ucp_Cuneiform },
{ 268, PT_SC, ucp_Cypriot },
{ 276, PT_SC, ucp_Cyrillic },
{ 285, PT_SC, ucp_Deseret },
{ 293, PT_SC, ucp_Devanagari },
{ 304, PT_SC, ucp_Dives_Akuru },
{ 316, PT_SC, ucp_Dogra },
{ 322, PT_SC, ucp_Duployan },
{ 331, PT_SC, ucp_Egyptian_Hieroglyphs },
{ 352, PT_SC, ucp_Elbasan },
{ 360, PT_SC, ucp_Elymaic },
{ 368, PT_SC, ucp_Ethiopic },
{ 377, PT_SC, ucp_Georgian },
{ 386, PT_SC, ucp_Glagolitic },
{ 397, PT_SC, ucp_Gothic },
{ 404, PT_SC, ucp_Grantha },
{ 412, PT_SC, ucp_Greek },
{ 418, PT_SC, ucp_Gujarati },
{ 427, PT_SC, ucp_Gunjala_Gondi },
{ 441, PT_SC, ucp_Gurmukhi },
{ 450, PT_SC, ucp_Han },
{ 454, PT_SC, ucp_Hangul },
{ 461, PT_SC, ucp_Hanifi_Rohingya },
{ 477, PT_SC, ucp_Hanunoo },
{ 485, PT_SC, ucp_Hatran },
{ 492, PT_SC, ucp_Hebrew },
{ 499, PT_SC, ucp_Hiragana },
{ 508, PT_SC, ucp_Imperial_Aramaic },
{ 525, PT_SC, ucp_Inherited },
{ 535, PT_SC, ucp_Inscriptional_Pahlavi },
{ 557, PT_SC, ucp_Inscriptional_Parthian },
{ 580, PT_SC, ucp_Javanese },
{ 589, PT_SC, ucp_Kaithi },
{ 596, PT_SC, ucp_Kannada },
{ 604, PT_SC, ucp_Katakana },
{ 613, PT_SC, ucp_Kayah_Li },
{ 622, PT_SC, ucp_Kharoshthi },
{ 633, PT_SC, ucp_Khitan_Small_Script },
{ 653, PT_SC, ucp_Khmer },
{ 659, PT_SC, ucp_Khojki },
{ 666, PT_SC, ucp_Khudawadi },
{ 676, PT_GC, ucp_L },
{ 678, PT_LAMP, 0 },
{ 681, PT_SC, ucp_Lao },
{ 685, PT_SC, ucp_Latin },
{ 691, PT_SC, ucp_Lepcha },
{ 698, PT_SC, ucp_Limbu },
{ 704, PT_SC, ucp_Linear_A },
{ 713, PT_SC, ucp_Linear_B },
{ 722, PT_SC, ucp_Lisu },
{ 727, PT_PC, ucp_Ll },
{ 730, PT_PC, ucp_Lm },
{ 733, PT_PC, ucp_Lo },
{ 736, PT_PC, ucp_Lt },
{ 739, PT_PC, ucp_Lu },
{ 742, PT_SC, ucp_Lycian },
{ 749, PT_SC, ucp_Lydian },
{ 756, PT_GC, ucp_M },
{ 758, PT_SC, ucp_Mahajani },
{ 767, PT_SC, ucp_Makasar },
{ 775, PT_SC, ucp_Malayalam },
{ 785, PT_SC, ucp_Mandaic },
{ 793, PT_SC, ucp_Manichaean },
{ 804, PT_SC, ucp_Marchen },
{ 812, PT_SC, ucp_Masaram_Gondi },
{ 826, PT_PC, ucp_Mc },
{ 829, PT_PC, ucp_Me },
{ 832, PT_SC, ucp_Medefaidrin },
{ 844, PT_SC, ucp_Meetei_Mayek },
{ 857, PT_SC, ucp_Mende_Kikakui },
{ 871, PT_SC, ucp_Meroitic_Cursive },
{ 888, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 909, PT_SC, ucp_Miao },
{ 914, PT_PC, ucp_Mn },
{ 917, PT_SC, ucp_Modi },
{ 922, PT_SC, ucp_Mongolian },
{ 932, PT_SC, ucp_Mro },
{ 936, PT_SC, ucp_Multani },
{ 944, PT_SC, ucp_Myanmar },
{ 952, PT_GC, ucp_N },
{ 954, PT_SC, ucp_Nabataean },
{ 964, PT_SC, ucp_Nandinagari },
{ 976, PT_PC, ucp_Nd },
{ 979, PT_SC, ucp_New_Tai_Lue },
{ 991, PT_SC, ucp_Newa },
{ 996, PT_SC, ucp_Nko },
{ 1000, PT_PC, ucp_Nl },
{ 1003, PT_PC, ucp_No },
{ 1006, PT_SC, ucp_Nushu },
{ 1012, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
{ 1035, PT_SC, ucp_Ogham },
{ 1041, PT_SC, ucp_Ol_Chiki },
{ 1050, PT_SC, ucp_Old_Hungarian },
{ 1064, PT_SC, ucp_Old_Italic },
{ 1075, PT_SC, ucp_Old_North_Arabian },
{ 1093, PT_SC, ucp_Old_Permic },
{ 1104, PT_SC, ucp_Old_Persian },
{ 1116, PT_SC, ucp_Old_Sogdian },
{ 1128, PT_SC, ucp_Old_South_Arabian },
{ 1146, PT_SC, ucp_Old_Turkic },
{ 1157, PT_SC, ucp_Oriya },
{ 1163, PT_SC, ucp_Osage },
{ 1169, PT_SC, ucp_Osmanya },
{ 1177, PT_GC, ucp_P },
{ 1179, PT_SC, ucp_Pahawh_Hmong },
{ 1192, PT_SC, ucp_Palmyrene },
{ 1202, PT_SC, ucp_Pau_Cin_Hau },
{ 1214, PT_PC, ucp_Pc },
{ 1217, PT_PC, ucp_Pd },
{ 1220, PT_PC, ucp_Pe },
{ 1223, PT_PC, ucp_Pf },
{ 1226, PT_SC, ucp_Phags_Pa },
{ 1235, PT_SC, ucp_Phoenician },
{ 1246, PT_PC, ucp_Pi },
{ 1249, PT_PC, ucp_Po },
{ 1252, PT_PC, ucp_Ps },
{ 1255, PT_SC, ucp_Psalter_Pahlavi },
{ 1271, PT_SC, ucp_Rejang },
{ 1278, PT_SC, ucp_Runic },
{ 1284, PT_GC, ucp_S },
{ 1286, PT_SC, ucp_Samaritan },
{ 1296, PT_SC, ucp_Saurashtra },
{ 1307, PT_PC, ucp_Sc },
{ 1310, PT_SC, ucp_Sharada },
{ 1318, PT_SC, ucp_Shavian },
{ 1326, PT_SC, ucp_Siddham },
{ 1334, PT_SC, ucp_SignWriting },
{ 1346, PT_SC, ucp_Sinhala },
{ 1354, PT_PC, ucp_Sk },
{ 1357, PT_PC, ucp_Sm },
{ 1360, PT_PC, ucp_So },
{ 1363, PT_SC, ucp_Sogdian },
{ 1371, PT_SC, ucp_Sora_Sompeng },
{ 1384, PT_SC, ucp_Soyombo },
{ 1392, PT_SC, ucp_Sundanese },
{ 1402, PT_SC, ucp_Syloti_Nagri },
{ 1415, PT_SC, ucp_Syriac },
{ 1422, PT_SC, ucp_Tagalog },
{ 1430, PT_SC, ucp_Tagbanwa },
{ 1439, PT_SC, ucp_Tai_Le },
{ 1446, PT_SC, ucp_Tai_Tham },
{ 1455, PT_SC, ucp_Tai_Viet },
{ 1464, PT_SC, ucp_Takri },
{ 1470, PT_SC, ucp_Tamil },
{ 1476, PT_SC, ucp_Tangut },
{ 1483, PT_SC, ucp_Telugu },
{ 1490, PT_SC, ucp_Thaana },
{ 1497, PT_SC, ucp_Thai },
{ 1502, PT_SC, ucp_Tibetan },
{ 1510, PT_SC, ucp_Tifinagh },
{ 1519, PT_SC, ucp_Tirhuta },
{ 1527, PT_SC, ucp_Ugaritic },
{ 1536, PT_SC, ucp_Unknown },
{ 1544, PT_SC, ucp_Vai },
{ 1548, PT_SC, ucp_Wancho },
{ 1555, PT_SC, ucp_Warang_Citi },
{ 1567, PT_ALNUM, 0 },
{ 1571, PT_PXSPACE, 0 },
{ 1575, PT_SPACE, 0 },
{ 1579, PT_UCNC, 0 },
{ 1583, PT_WORD, 0 },
{ 1587, PT_SC, ucp_Yezidi },
{ 1594, PT_SC, ucp_Yi },
{ 1597, PT_GC, ucp_Z },
{ 1599, PT_SC, ucp_Zanabazar_Square },
{ 1616, PT_PC, ucp_Zl },
{ 1619, PT_PC, ucp_Zp },
{ 1622, PT_PC, ucp_Zs }
};
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);

File diff suppressed because it is too large Load Diff

View File

@ -286,7 +286,12 @@ enum {
ucp_Elymaic,
ucp_Nandinagari,
ucp_Nyiakeng_Puachue_Hmong,
ucp_Wancho
ucp_Wancho,
/* New for Unicode 13.0.0 */
ucp_Chorasmian,
ucp_Dives_Akuru,
ucp_Khitan_Small_Script,
ucp_Yezidi
};
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */

2
testdata/testinput4 vendored
View File

@ -804,10 +804,10 @@
\x{4d00}
\x{4db4}
\x{4db5}
\x{4db6}
\= Expect no match
a
\x{2b0}
\x{4db6}
/^\p{Lt}/utf
\x{1c5}

6
testdata/testinput5 vendored
View File

@ -2081,7 +2081,6 @@
\x{655}
/^\p{Common}/utf
\x{589}
\x{60c}
\x{61f}
\x{964}
@ -2158,6 +2157,11 @@
/\p{Elymaic}\p{Nandinagari}\p{Nyiakeng_Puachue_Hmong}\p{Wancho}/utf
\x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1}
# Some Unicode 13.0.0 new script characters
/\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf
\x{10FB0}\x{11900}\x{18B00}\x{10E80}
# -------
# Test reference and errors in non-ASCII characters in group names

View File

@ -1342,13 +1342,13 @@ No match
0: \x{4db4}
\x{4db5}
0: \x{4db5}
\x{4db6}
0: \x{4db6}
\= Expect no match
a
No match
\x{2b0}
No match
\x{4db6}
No match
/^\p{Lt}/utf
\x{1c5}

View File

@ -4736,8 +4736,6 @@ No match
No match
/^\p{Common}/utf
\x{589}
0: \x{589}
\x{60c}
0: \x{60c}
\x{61f}
@ -4900,6 +4898,12 @@ MK: ABC
\x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1}
0: \x{10fe5}\x{119ac}\x{1e10e}\x{1e2d1}
# Some Unicode 13.0.0 new script characters
/\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf
\x{10FB0}\x{11900}\x{18B00}\x{10E80}
0: \x{10fb0}\x{11900}\x{18b00}\x{10e80}
# -------
# Test reference and errors in non-ASCII characters in group names