Update to Unicode 13.0.0.
This commit is contained in:
parent
59233b8079
commit
c472f3f91a
|
@ -97,6 +97,8 @@ character tables handling have been done:
|
|||
22. Changed setting of CMAKE_MODULE_PATH in CMakeLists.txt from SET to
|
||||
LIST(APPEND...) to allow a setting from the command line to be included.
|
||||
|
||||
23. Updated to Unicode 13.0.0.
|
||||
|
||||
|
||||
Version 10.34 21-November-2019
|
||||
------------------------------
|
||||
|
|
|
@ -819,6 +819,7 @@ Caucasian_Albanian,
|
|||
Chakma,
|
||||
Cham,
|
||||
Cherokee,
|
||||
Chorasmian,
|
||||
Common,
|
||||
Coptic,
|
||||
Cuneiform,
|
||||
|
@ -826,6 +827,7 @@ Cypriot,
|
|||
Cyrillic,
|
||||
Deseret,
|
||||
Devanagari,
|
||||
Dives_Akuru,
|
||||
Dogra,
|
||||
Duployan,
|
||||
Egyptian_Hieroglyphs,
|
||||
|
@ -857,6 +859,7 @@ Kannada,
|
|||
Katakana,
|
||||
Kayah_Li,
|
||||
Kharoshthi,
|
||||
Khitan_Small_Script,
|
||||
Khmer,
|
||||
Khojki,
|
||||
Khudawadi,
|
||||
|
@ -947,6 +950,7 @@ Unknown,
|
|||
Vai,
|
||||
Wancho,
|
||||
Warang_Citi,
|
||||
Yezidi,
|
||||
Yi,
|
||||
Zanabazar_Square.
|
||||
</P>
|
||||
|
|
|
@ -223,6 +223,7 @@ Caucasian_Albanian,
|
|||
Chakma,
|
||||
Cham,
|
||||
Cherokee,
|
||||
Chorasmian,
|
||||
Common,
|
||||
Coptic,
|
||||
Cuneiform,
|
||||
|
@ -230,6 +231,7 @@ Cypriot,
|
|||
Cyrillic,
|
||||
Deseret,
|
||||
Devanagari,
|
||||
Dives_Akuru,
|
||||
Dogra,
|
||||
Duployan,
|
||||
Egyptian_Hieroglyphs,
|
||||
|
@ -261,6 +263,7 @@ Kannada,
|
|||
Katakana,
|
||||
Kayah_Li,
|
||||
Kharoshthi,
|
||||
Khitan_Small_Script,
|
||||
Khmer,
|
||||
Khojki,
|
||||
Khudawadi,
|
||||
|
@ -350,6 +353,7 @@ Ugaritic,
|
|||
Vai,
|
||||
Wancho,
|
||||
Warang_Citi,
|
||||
Yezidi,
|
||||
Yi,
|
||||
Zanabazar_Square.
|
||||
</P>
|
||||
|
|
|
@ -6825,27 +6825,28 @@ BACKSLASH
|
|||
Adlam, Ahom, Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Bali-
|
||||
nese, Bamum, Bassa_Vah, Batak, Bengali, Bhaiksuki, Bopomofo, Brahmi,
|
||||
Braille, Buginese, Buhid, Canadian_Aboriginal, Carian, Caucasian_Alba-
|
||||
nian, Chakma, Cham, Cherokee, Common, Coptic, Cuneiform, Cypriot,
|
||||
Cyrillic, Deseret, Devanagari, Dogra, Duployan, Egyptian_Hieroglyphs,
|
||||
Elbasan, Elymaic, Ethiopic, Georgian, Glagolitic, Gothic, Grantha,
|
||||
Greek, Gujarati, Gunjala_Gondi, Gurmukhi, Han, Hangul, Hanifi_Rohingya,
|
||||
Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic, Inherited, In-
|
||||
scriptional_Pahlavi, Inscriptional_Parthian, Javanese, Kaithi, Kannada,
|
||||
Katakana, Kayah_Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Latin,
|
||||
Lepcha, Limbu, Linear_A, Linear_B, Lisu, Lycian, Lydian, Mahajani,
|
||||
Makasar, Malayalam, Mandaic, Manichaean, Marchen, Masaram_Gondi, Mede-
|
||||
faidrin, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive, Meroitic_Hiero-
|
||||
glyphs, Miao, Modi, Mongolian, Mro, Multani, Myanmar, Nabataean, Nandi-
|
||||
nagari, New_Tai_Lue, Newa, Nko, Nushu, Nyakeng_Puachue_Hmong, Ogham,
|
||||
Ol_Chiki, Old_Hungarian, Old_Italic, Old_North_Arabian, Old_Permic,
|
||||
Old_Persian, Old_Sogdian, Old_South_Arabian, Old_Turkic, Oriya, Osage,
|
||||
Osmanya, Pahawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician,
|
||||
nian, Chakma, Cham, Cherokee, Chorasmian, Common, Coptic, Cuneiform,
|
||||
Cypriot, Cyrillic, Deseret, Devanagari, Dives_Akuru, Dogra, Duployan,
|
||||
Egyptian_Hieroglyphs, Elbasan, Elymaic, Ethiopic, Georgian, Glagolitic,
|
||||
Gothic, Grantha, Greek, Gujarati, Gunjala_Gondi, Gurmukhi, Han, Hangul,
|
||||
Hanifi_Rohingya, Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic,
|
||||
Inherited, Inscriptional_Pahlavi, Inscriptional_Parthian, Javanese,
|
||||
Kaithi, Kannada, Katakana, Kayah_Li, Kharoshthi, Khitan_Small_Script,
|
||||
Khmer, Khojki, Khudawadi, Lao, Latin, Lepcha, Limbu, Linear_A, Lin-
|
||||
ear_B, Lisu, Lycian, Lydian, Mahajani, Makasar, Malayalam, Mandaic,
|
||||
Manichaean, Marchen, Masaram_Gondi, Medefaidrin, Meetei_Mayek,
|
||||
Mende_Kikakui, Meroitic_Cursive, Meroitic_Hieroglyphs, Miao, Modi, Mon-
|
||||
golian, Mro, Multani, Myanmar, Nabataean, Nandinagari, New_Tai_Lue,
|
||||
Newa, Nko, Nushu, Nyakeng_Puachue_Hmong, Ogham, Ol_Chiki, Old_Hungar-
|
||||
ian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian, Old_Sog-
|
||||
dian, Old_South_Arabian, Old_Turkic, Oriya, Osage, Osmanya, Pa-
|
||||
hawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician,
|
||||
Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, Sharada, Sha-
|
||||
vian, Siddham, SignWriting, Sinhala, Sogdian, Sora_Sompeng, Soyombo,
|
||||
Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, Tai_Tham,
|
||||
Tai_Viet, Takri, Tamil, Tangut, Telugu, Thaana, Thai, Tibetan, Tifi-
|
||||
nagh, Tirhuta, Ugaritic, Unknown, Vai, Wancho, Warang_Citi, Yi, Zan-
|
||||
abazar_Square.
|
||||
nagh, Tirhuta, Ugaritic, Unknown, Vai, Wancho, Warang_Citi, Yezidi, Yi,
|
||||
Zanabazar_Square.
|
||||
|
||||
Each character has exactly one Unicode general category property, spec-
|
||||
ified by a two-letter abbreviation. For compatibility with Perl, nega-
|
||||
|
@ -10569,26 +10570,27 @@ SCRIPT NAMES FOR \p AND \P
|
|||
Adlam, Ahom, Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Bali-
|
||||
nese, Bamum, Bassa_Vah, Batak, Bengali, Bhaiksuki, Bopomofo, Brahmi,
|
||||
Braille, Buginese, Buhid, Canadian_Aboriginal, Carian, Caucasian_Alba-
|
||||
nian, Chakma, Cham, Cherokee, Common, Coptic, Cuneiform, Cypriot,
|
||||
Cyrillic, Deseret, Devanagari, Dogra, Duployan, Egyptian_Hieroglyphs,
|
||||
Elbasan, Elymaic, Ethiopic, Georgian, Glagolitic, Gothic, Grantha,
|
||||
Greek, Gujarati, Gunjala_Gondi, Gurmukhi, Han, Hangul, Hanifi_Rohingya,
|
||||
Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic, Inherited, In-
|
||||
scriptional_Pahlavi, Inscriptional_Parthian, Javanese, Kaithi, Kannada,
|
||||
Katakana, Kayah_Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Latin,
|
||||
Lepcha, Limbu, Linear_A, Linear_B, Lisu, Lycian, Lydian, Mahajani,
|
||||
Makasar, Malayalam, Mandaic, Manichaean, Marchen, Masaram_Gondi, Mede-
|
||||
faidrin, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive, Meroitic_Hiero-
|
||||
glyphs, Miao, Modi, Mongolian, Mro, Multani, Myanmar, Nabataean, Nandi-
|
||||
nagari, New_Tai_Lue, Newa, Nko, Nushu, Nyakeng_Puachue_Hmong, Ogham,
|
||||
Ol_Chiki, Old_Hungarian, Old_Italic, Old_North_Arabian, Old_Permic,
|
||||
Old_Persian, Old_Sogdian, Old_South_Arabian, Old_Turkic, Oriya, Osage,
|
||||
Osmanya, Pahawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician,
|
||||
nian, Chakma, Cham, Cherokee, Chorasmian, Common, Coptic, Cuneiform,
|
||||
Cypriot, Cyrillic, Deseret, Devanagari, Dives_Akuru, Dogra, Duployan,
|
||||
Egyptian_Hieroglyphs, Elbasan, Elymaic, Ethiopic, Georgian, Glagolitic,
|
||||
Gothic, Grantha, Greek, Gujarati, Gunjala_Gondi, Gurmukhi, Han, Hangul,
|
||||
Hanifi_Rohingya, Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic,
|
||||
Inherited, Inscriptional_Pahlavi, Inscriptional_Parthian, Javanese,
|
||||
Kaithi, Kannada, Katakana, Kayah_Li, Kharoshthi, Khitan_Small_Script,
|
||||
Khmer, Khojki, Khudawadi, Lao, Latin, Lepcha, Limbu, Linear_A, Lin-
|
||||
ear_B, Lisu, Lycian, Lydian, Mahajani, Makasar, Malayalam, Mandaic,
|
||||
Manichaean, Marchen, Masaram_Gondi, Medefaidrin, Meetei_Mayek,
|
||||
Mende_Kikakui, Meroitic_Cursive, Meroitic_Hieroglyphs, Miao, Modi, Mon-
|
||||
golian, Mro, Multani, Myanmar, Nabataean, Nandinagari, New_Tai_Lue,
|
||||
Newa, Nko, Nushu, Nyakeng_Puachue_Hmong, Ogham, Ol_Chiki, Old_Hungar-
|
||||
ian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian, Old_Sog-
|
||||
dian, Old_South_Arabian, Old_Turkic, Oriya, Osage, Osmanya, Pa-
|
||||
hawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician,
|
||||
Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, Sharada, Sha-
|
||||
vian, Siddham, SignWriting, Sinhala, Sogdian, Sora_Sompeng, Soyombo,
|
||||
Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, Tai_Tham,
|
||||
Tai_Viet, Takri, Tamil, Tangut, Telugu, Thaana, Thai, Tibetan, Tifi-
|
||||
nagh, Tirhuta, Ugaritic, Vai, Wancho, Warang_Citi, Yi, Zan-
|
||||
nagh, Tirhuta, Ugaritic, Vai, Wancho, Warang_Citi, Yezidi, Yi, Zan-
|
||||
abazar_Square.
|
||||
|
||||
|
||||
|
|
|
@ -814,6 +814,7 @@ Caucasian_Albanian,
|
|||
Chakma,
|
||||
Cham,
|
||||
Cherokee,
|
||||
Chorasmian,
|
||||
Common,
|
||||
Coptic,
|
||||
Cuneiform,
|
||||
|
@ -821,6 +822,7 @@ Cypriot,
|
|||
Cyrillic,
|
||||
Deseret,
|
||||
Devanagari,
|
||||
Dives_Akuru,
|
||||
Dogra,
|
||||
Duployan,
|
||||
Egyptian_Hieroglyphs,
|
||||
|
@ -852,6 +854,7 @@ Kannada,
|
|||
Katakana,
|
||||
Kayah_Li,
|
||||
Kharoshthi,
|
||||
Khitan_Small_Script,
|
||||
Khmer,
|
||||
Khojki,
|
||||
Khudawadi,
|
||||
|
@ -942,6 +945,7 @@ Unknown,
|
|||
Vai,
|
||||
Wancho,
|
||||
Warang_Citi,
|
||||
Yezidi,
|
||||
Yi,
|
||||
Zanabazar_Square.
|
||||
.P
|
||||
|
|
|
@ -193,6 +193,7 @@ Caucasian_Albanian,
|
|||
Chakma,
|
||||
Cham,
|
||||
Cherokee,
|
||||
Chorasmian,
|
||||
Common,
|
||||
Coptic,
|
||||
Cuneiform,
|
||||
|
@ -200,6 +201,7 @@ Cypriot,
|
|||
Cyrillic,
|
||||
Deseret,
|
||||
Devanagari,
|
||||
Dives_Akuru,
|
||||
Dogra,
|
||||
Duployan,
|
||||
Egyptian_Hieroglyphs,
|
||||
|
@ -231,6 +233,7 @@ Kannada,
|
|||
Katakana,
|
||||
Kayah_Li,
|
||||
Kharoshthi,
|
||||
Khitan_Small_Script,
|
||||
Khmer,
|
||||
Khojki,
|
||||
Khudawadi,
|
||||
|
@ -320,6 +323,7 @@ Ugaritic,
|
|||
Vai,
|
||||
Wancho,
|
||||
Warang_Citi,
|
||||
Yezidi,
|
||||
Yi,
|
||||
Zanabazar_Square.
|
||||
.
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
# Added script names for Unicode 11.0.0, 03-July-2018.
|
||||
# Added 'Unknown' script, 01-October-2018.
|
||||
# Added script names for Unicode 12.1.0, 27-July-2019.
|
||||
# Added script names for Unicode 13.0.0, 10-March-2020.
|
||||
|
||||
script_names = ['Unknown', 'Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \
|
||||
'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \
|
||||
|
@ -63,7 +64,9 @@ script_names = ['Unknown', 'Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille
|
|||
'Dogra', 'Gunjala_Gondi', 'Hanifi_Rohingya', 'Makasar', 'Medefaidrin',
|
||||
'Old_Sogdian', 'Sogdian',
|
||||
# New for Unicode 12.0.0
|
||||
'Elymaic', 'Nandinagari', 'Nyiakeng_Puachue_Hmong', 'Wancho'
|
||||
'Elymaic', 'Nandinagari', 'Nyiakeng_Puachue_Hmong', 'Wancho',
|
||||
# New for Unicode 13.0.0
|
||||
'Chorasmian', 'Dives_Akuru', 'Khitan_Small_Script', 'Yezidi'
|
||||
]
|
||||
|
||||
category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
|
||||
|
|
|
@ -23,11 +23,14 @@
|
|||
# DerivedGeneralCategory.txt is found in the "extracted" subdirectory of the
|
||||
# Unicode database (UCD) on the Unicode web site; GraphemeBreakProperty.txt is
|
||||
# in the "auxiliary" subdirectory. Scripts.txt, ScriptExtensions.txt, and
|
||||
# CaseFolding.txt are directly in the UCD directory. The emoji-data.txt file is
|
||||
# CaseFolding.txt are directly in the UCD directory.
|
||||
#
|
||||
# The emoji-data.txt file is found in the "emoji" subdirectory even though it
|
||||
# is technically part of a different (but coordinated) standard as shown
|
||||
# in files associated with Unicode Technical Standard #51 ("Unicode Emoji"),
|
||||
# for example:
|
||||
#
|
||||
# http://unicode.org/Public/emoji/11.0/emoji-data.txt
|
||||
# http://unicode.org/Public/emoji/13.0/ReadMe.txt
|
||||
#
|
||||
# -----------------------------------------------------------------------------
|
||||
# Minor modifications made to this script:
|
||||
|
@ -88,6 +91,7 @@
|
|||
# 01-October-2018: Added the 'Unknown' script name
|
||||
# 03-October-2018: Added new field for Script Extensions
|
||||
# 27-July-2019: Updated for Unicode 12.1.0
|
||||
# 10-March-2020: Updated for Unicode 13.0.0
|
||||
# ----------------------------------------------------------------------------
|
||||
#
|
||||
#
|
||||
|
@ -179,7 +183,6 @@
|
|||
# any of those scripts, which are Bengali, Devanagari, Grantha, and Kannada.
|
||||
#
|
||||
# Philip Hazel, 03 July 2008
|
||||
# Last Updated: 07 October 2018
|
||||
##############################################################################
|
||||
|
||||
|
||||
|
@ -427,7 +430,9 @@ script_names = ['Unknown', 'Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille
|
|||
'Dogra', 'Gunjala_Gondi', 'Hanifi_Rohingya', 'Makasar', 'Medefaidrin',
|
||||
'Old_Sogdian', 'Sogdian',
|
||||
# New for Unicode 12.0.0
|
||||
'Elymaic', 'Nandinagari', 'Nyiakeng_Puachue_Hmong', 'Wancho'
|
||||
'Elymaic', 'Nandinagari', 'Nyiakeng_Puachue_Hmong', 'Wancho',
|
||||
# New for Unicode 13.0.0
|
||||
'Chorasmian', 'Dives_Akuru', 'Khitan_Small_Script', 'Yezidi'
|
||||
]
|
||||
|
||||
script_abbrevs = [
|
||||
|
@ -462,7 +467,9 @@ script_abbrevs = [
|
|||
#New for Unicode 11.0.0
|
||||
'Dogr', 'Gong', 'Rohg', 'Maka', 'Medf', 'Sogo', 'Sogd',
|
||||
#New for Unicode 12.0.0
|
||||
'Elym', 'Nand', 'Hmnp', 'Wcho'
|
||||
'Elym', 'Nand', 'Hmnp', 'Wcho',
|
||||
#New for Unicode 13.0.0
|
||||
'Chrs', 'Diak', 'Kits', 'Yezi'
|
||||
]
|
||||
|
||||
category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
|
||||
|
|
|
@ -81,11 +81,11 @@ script names.
|
|||
|
||||
MultiStage2.py has two lists: the full names and the abbreviations that are
|
||||
found in the ScriptExtensions.txt file. A list of script names and their
|
||||
abbreviations s can be found in the PropertyValueAliases.txt file on the
|
||||
abbreviations can be found in the PropertyValueAliases.txt file on the
|
||||
Unicode web site. There is also a Wikipedia page that lists them, and notes the
|
||||
Unicode version in which they were introduced:
|
||||
|
||||
http://en.wikipedia.org/wiki/Unicode_scripts#Table_of_Unicode_scripts
|
||||
https://en.wikipedia.org/wiki/Unicode_scripts#Table_of_Unicode_scripts
|
||||
|
||||
Once the script name lists have been updated, MultiStage2.py can be run to
|
||||
generate a new version of pcre2_ucd.c, and GenerateUtt.py can be run to
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# CaseFolding-12.1.0.txt
|
||||
# Date: 2019-03-10, 10:53:00 GMT
|
||||
# CaseFolding-13.0.0.txt
|
||||
# Date: 2019-09-08, 23:30:59 GMT
|
||||
# © 2019 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
|
@ -1234,6 +1234,9 @@ A7C2; C; A7C3; # LATIN CAPITAL LETTER ANGLICANA W
|
|||
A7C4; C; A794; # LATIN CAPITAL LETTER C WITH PALATAL HOOK
|
||||
A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK
|
||||
A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK
|
||||
A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
|
||||
A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
|
||||
A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H
|
||||
AB70; C; 13A0; # CHEROKEE SMALL LETTER A
|
||||
AB71; C; 13A1; # CHEROKEE SMALL LETTER E
|
||||
AB72; C; 13A2; # CHEROKEE SMALL LETTER I
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# DerivedGeneralCategory-12.1.0.txt
|
||||
# Date: 2019-03-10, 10:53:08 GMT
|
||||
# DerivedGeneralCategory-13.0.0.txt
|
||||
# Date: 2019-10-21, 14:30:32 GMT
|
||||
# © 2019 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
|
@ -38,7 +38,7 @@
|
|||
085F ; Cn # <reserved-085F>
|
||||
086B..089F ; Cn # [53] <reserved-086B>..<reserved-089F>
|
||||
08B5 ; Cn # <reserved-08B5>
|
||||
08BE..08D2 ; Cn # [21] <reserved-08BE>..<reserved-08D2>
|
||||
08C8..08D2 ; Cn # [11] <reserved-08C8>..<reserved-08D2>
|
||||
0984 ; Cn # <reserved-0984>
|
||||
098D..098E ; Cn # [2] <reserved-098D>..<reserved-098E>
|
||||
0991..0992 ; Cn # [2] <reserved-0991>..<reserved-0992>
|
||||
|
@ -92,7 +92,7 @@
|
|||
0B3A..0B3B ; Cn # [2] <reserved-0B3A>..<reserved-0B3B>
|
||||
0B45..0B46 ; Cn # [2] <reserved-0B45>..<reserved-0B46>
|
||||
0B49..0B4A ; Cn # [2] <reserved-0B49>..<reserved-0B4A>
|
||||
0B4E..0B55 ; Cn # [8] <reserved-0B4E>..<reserved-0B55>
|
||||
0B4E..0B54 ; Cn # [7] <reserved-0B4E>..<reserved-0B54>
|
||||
0B58..0B5B ; Cn # [4] <reserved-0B58>..<reserved-0B5B>
|
||||
0B5E ; Cn # <reserved-0B5E>
|
||||
0B64..0B65 ; Cn # [2] <reserved-0B64>..<reserved-0B65>
|
||||
|
@ -137,14 +137,13 @@
|
|||
0CE4..0CE5 ; Cn # [2] <reserved-0CE4>..<reserved-0CE5>
|
||||
0CF0 ; Cn # <reserved-0CF0>
|
||||
0CF3..0CFF ; Cn # [13] <reserved-0CF3>..<reserved-0CFF>
|
||||
0D04 ; Cn # <reserved-0D04>
|
||||
0D0D ; Cn # <reserved-0D0D>
|
||||
0D11 ; Cn # <reserved-0D11>
|
||||
0D45 ; Cn # <reserved-0D45>
|
||||
0D49 ; Cn # <reserved-0D49>
|
||||
0D50..0D53 ; Cn # [4] <reserved-0D50>..<reserved-0D53>
|
||||
0D64..0D65 ; Cn # [2] <reserved-0D64>..<reserved-0D65>
|
||||
0D80..0D81 ; Cn # [2] <reserved-0D80>..<reserved-0D81>
|
||||
0D80 ; Cn # <reserved-0D80>
|
||||
0D84 ; Cn # <reserved-0D84>
|
||||
0D97..0D99 ; Cn # [3] <reserved-0D97>..<reserved-0D99>
|
||||
0DB2 ; Cn # <reserved-0DB2>
|
||||
|
@ -231,7 +230,7 @@
|
|||
1A8A..1A8F ; Cn # [6] <reserved-1A8A>..<reserved-1A8F>
|
||||
1A9A..1A9F ; Cn # [6] <reserved-1A9A>..<reserved-1A9F>
|
||||
1AAE..1AAF ; Cn # [2] <reserved-1AAE>..<reserved-1AAF>
|
||||
1ABF..1AFF ; Cn # [65] <reserved-1ABF>..<reserved-1AFF>
|
||||
1AC1..1AFF ; Cn # [63] <reserved-1AC1>..<reserved-1AFF>
|
||||
1B4C..1B4F ; Cn # [4] <reserved-1B4C>..<reserved-1B4F>
|
||||
1B7D..1B7F ; Cn # [3] <reserved-1B7D>..<reserved-1B7F>
|
||||
1BF4..1BFB ; Cn # [8] <reserved-1BF4>..<reserved-1BFB>
|
||||
|
@ -268,7 +267,7 @@
|
|||
2427..243F ; Cn # [25] <reserved-2427>..<reserved-243F>
|
||||
244B..245F ; Cn # [21] <reserved-244B>..<reserved-245F>
|
||||
2B74..2B75 ; Cn # [2] <reserved-2B74>..<reserved-2B75>
|
||||
2B96..2B97 ; Cn # [2] <reserved-2B96>..<reserved-2B97>
|
||||
2B96 ; Cn # <reserved-2B96>
|
||||
2C2F ; Cn # <reserved-2C2F>
|
||||
2C5F ; Cn # <reserved-2C5F>
|
||||
2CF4..2CF8 ; Cn # [5] <reserved-2CF4>..<reserved-2CF8>
|
||||
|
@ -286,7 +285,7 @@
|
|||
2DCF ; Cn # <reserved-2DCF>
|
||||
2DD7 ; Cn # <reserved-2DD7>
|
||||
2DDF ; Cn # <reserved-2DDF>
|
||||
2E50..2E7F ; Cn # [48] <reserved-2E50>..<reserved-2E7F>
|
||||
2E53..2E7F ; Cn # [45] <reserved-2E53>..<reserved-2E7F>
|
||||
2E9A ; Cn # <reserved-2E9A>
|
||||
2EF4..2EFF ; Cn # [12] <reserved-2EF4>..<reserved-2EFF>
|
||||
2FD6..2FEF ; Cn # [26] <reserved-2FD6>..<reserved-2FEF>
|
||||
|
@ -296,18 +295,16 @@
|
|||
3100..3104 ; Cn # [5] <reserved-3100>..<reserved-3104>
|
||||
3130 ; Cn # <reserved-3130>
|
||||
318F ; Cn # <reserved-318F>
|
||||
31BB..31BF ; Cn # [5] <reserved-31BB>..<reserved-31BF>
|
||||
31E4..31EF ; Cn # [12] <reserved-31E4>..<reserved-31EF>
|
||||
321F ; Cn # <reserved-321F>
|
||||
4DB6..4DBF ; Cn # [10] <reserved-4DB6>..<reserved-4DBF>
|
||||
9FF0..9FFF ; Cn # [16] <reserved-9FF0>..<reserved-9FFF>
|
||||
9FFD..9FFF ; Cn # [3] <reserved-9FFD>..<reserved-9FFF>
|
||||
A48D..A48F ; Cn # [3] <reserved-A48D>..<reserved-A48F>
|
||||
A4C7..A4CF ; Cn # [9] <reserved-A4C7>..<reserved-A4CF>
|
||||
A62C..A63F ; Cn # [20] <reserved-A62C>..<reserved-A63F>
|
||||
A6F8..A6FF ; Cn # [8] <reserved-A6F8>..<reserved-A6FF>
|
||||
A7C0..A7C1 ; Cn # [2] <reserved-A7C0>..<reserved-A7C1>
|
||||
A7C7..A7F6 ; Cn # [48] <reserved-A7C7>..<reserved-A7F6>
|
||||
A82C..A82F ; Cn # [4] <reserved-A82C>..<reserved-A82F>
|
||||
A7CB..A7F4 ; Cn # [42] <reserved-A7CB>..<reserved-A7F4>
|
||||
A82D..A82F ; Cn # [3] <reserved-A82D>..<reserved-A82F>
|
||||
A83A..A83F ; Cn # [6] <reserved-A83A>..<reserved-A83F>
|
||||
A878..A87F ; Cn # [8] <reserved-A878>..<reserved-A87F>
|
||||
A8C6..A8CD ; Cn # [8] <reserved-A8C6>..<reserved-A8CD>
|
||||
|
@ -327,7 +324,7 @@ AB0F..AB10 ; Cn # [2] <reserved-AB0F>..<reserved-AB10>
|
|||
AB17..AB1F ; Cn # [9] <reserved-AB17>..<reserved-AB1F>
|
||||
AB27 ; Cn # <reserved-AB27>
|
||||
AB2F ; Cn # <reserved-AB2F>
|
||||
AB68..AB6F ; Cn # [8] <reserved-AB68>..<reserved-AB6F>
|
||||
AB6C..AB6F ; Cn # [4] <reserved-AB6C>..<reserved-AB6F>
|
||||
ABEE..ABEF ; Cn # [2] <reserved-ABEE>..<reserved-ABEF>
|
||||
ABFA..ABFF ; Cn # [6] <reserved-ABFA>..<reserved-ABFF>
|
||||
D7A4..D7AF ; Cn # [12] <reserved-D7A4>..<reserved-D7AF>
|
||||
|
@ -372,7 +369,7 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
|
|||
10103..10106 ; Cn # [4] <reserved-10103>..<reserved-10106>
|
||||
10134..10136 ; Cn # [3] <reserved-10134>..<reserved-10136>
|
||||
1018F ; Cn # <reserved-1018F>
|
||||
1019C..1019F ; Cn # [4] <reserved-1019C>..<reserved-1019F>
|
||||
1019D..1019F ; Cn # [3] <reserved-1019D>..<reserved-1019F>
|
||||
101A1..101CF ; Cn # [47] <reserved-101A1>..<reserved-101CF>
|
||||
101FE..1027F ; Cn # [130] <reserved-101FE>..<reserved-1027F>
|
||||
1029D..1029F ; Cn # [3] <reserved-1029D>..<reserved-1029F>
|
||||
|
@ -431,9 +428,13 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
|
|||
10CF3..10CF9 ; Cn # [7] <reserved-10CF3>..<reserved-10CF9>
|
||||
10D28..10D2F ; Cn # [8] <reserved-10D28>..<reserved-10D2F>
|
||||
10D3A..10E5F ; Cn # [294] <reserved-10D3A>..<reserved-10E5F>
|
||||
10E7F..10EFF ; Cn # [129] <reserved-10E7F>..<reserved-10EFF>
|
||||
10E7F ; Cn # <reserved-10E7F>
|
||||
10EAA ; Cn # <reserved-10EAA>
|
||||
10EAE..10EAF ; Cn # [2] <reserved-10EAE>..<reserved-10EAF>
|
||||
10EB2..10EFF ; Cn # [78] <reserved-10EB2>..<reserved-10EFF>
|
||||
10F28..10F2F ; Cn # [8] <reserved-10F28>..<reserved-10F2F>
|
||||
10F5A..10FDF ; Cn # [134] <reserved-10F5A>..<reserved-10FDF>
|
||||
10F5A..10FAF ; Cn # [86] <reserved-10F5A>..<reserved-10FAF>
|
||||
10FCC..10FDF ; Cn # [20] <reserved-10FCC>..<reserved-10FDF>
|
||||
10FF7..10FFF ; Cn # [9] <reserved-10FF7>..<reserved-10FFF>
|
||||
1104E..11051 ; Cn # [4] <reserved-1104E>..<reserved-11051>
|
||||
11070..1107E ; Cn # [15] <reserved-11070>..<reserved-1107E>
|
||||
|
@ -442,9 +443,8 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
|
|||
110E9..110EF ; Cn # [7] <reserved-110E9>..<reserved-110EF>
|
||||
110FA..110FF ; Cn # [6] <reserved-110FA>..<reserved-110FF>
|
||||
11135 ; Cn # <reserved-11135>
|
||||
11147..1114F ; Cn # [9] <reserved-11147>..<reserved-1114F>
|
||||
11148..1114F ; Cn # [8] <reserved-11148>..<reserved-1114F>
|
||||
11177..1117F ; Cn # [9] <reserved-11177>..<reserved-1117F>
|
||||
111CE..111CF ; Cn # [2] <reserved-111CE>..<reserved-111CF>
|
||||
111E0 ; Cn # <reserved-111E0>
|
||||
111F5..111FF ; Cn # [11] <reserved-111F5>..<reserved-111FF>
|
||||
11212 ; Cn # <reserved-11212>
|
||||
|
@ -471,9 +471,8 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
|
|||
11364..11365 ; Cn # [2] <reserved-11364>..<reserved-11365>
|
||||
1136D..1136F ; Cn # [3] <reserved-1136D>..<reserved-1136F>
|
||||
11375..113FF ; Cn # [139] <reserved-11375>..<reserved-113FF>
|
||||
1145A ; Cn # <reserved-1145A>
|
||||
1145C ; Cn # <reserved-1145C>
|
||||
11460..1147F ; Cn # [32] <reserved-11460>..<reserved-1147F>
|
||||
11462..1147F ; Cn # [30] <reserved-11462>..<reserved-1147F>
|
||||
114C8..114CF ; Cn # [8] <reserved-114C8>..<reserved-114CF>
|
||||
114DA..1157F ; Cn # [166] <reserved-114DA>..<reserved-1157F>
|
||||
115B6..115B7 ; Cn # [2] <reserved-115B6>..<reserved-115B7>
|
||||
|
@ -488,7 +487,14 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
|
|||
11740..117FF ; Cn # [192] <reserved-11740>..<reserved-117FF>
|
||||
1183C..1189F ; Cn # [100] <reserved-1183C>..<reserved-1189F>
|
||||
118F3..118FE ; Cn # [12] <reserved-118F3>..<reserved-118FE>
|
||||
11900..1199F ; Cn # [160] <reserved-11900>..<reserved-1199F>
|
||||
11907..11908 ; Cn # [2] <reserved-11907>..<reserved-11908>
|
||||
1190A..1190B ; Cn # [2] <reserved-1190A>..<reserved-1190B>
|
||||
11914 ; Cn # <reserved-11914>
|
||||
11917 ; Cn # <reserved-11917>
|
||||
11936 ; Cn # <reserved-11936>
|
||||
11939..1193A ; Cn # [2] <reserved-11939>..<reserved-1193A>
|
||||
11947..1194F ; Cn # [9] <reserved-11947>..<reserved-1194F>
|
||||
1195A..1199F ; Cn # [70] <reserved-1195A>..<reserved-1199F>
|
||||
119A8..119A9 ; Cn # [2] <reserved-119A8>..<reserved-119A9>
|
||||
119D8..119D9 ; Cn # [2] <reserved-119D8>..<reserved-119D9>
|
||||
119E5..119FF ; Cn # [27] <reserved-119E5>..<reserved-119FF>
|
||||
|
@ -515,7 +521,8 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
|
|||
11D92 ; Cn # <reserved-11D92>
|
||||
11D99..11D9F ; Cn # [7] <reserved-11D99>..<reserved-11D9F>
|
||||
11DAA..11EDF ; Cn # [310] <reserved-11DAA>..<reserved-11EDF>
|
||||
11EF9..11FBF ; Cn # [199] <reserved-11EF9>..<reserved-11FBF>
|
||||
11EF9..11FAF ; Cn # [183] <reserved-11EF9>..<reserved-11FAF>
|
||||
11FB1..11FBF ; Cn # [15] <reserved-11FB1>..<reserved-11FBF>
|
||||
11FF2..11FFE ; Cn # [13] <reserved-11FF2>..<reserved-11FFE>
|
||||
1239A..123FF ; Cn # [102] <reserved-1239A>..<reserved-123FF>
|
||||
1246F ; Cn # <reserved-1246F>
|
||||
|
@ -539,9 +546,11 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
|
|||
16F4B..16F4E ; Cn # [4] <reserved-16F4B>..<reserved-16F4E>
|
||||
16F88..16F8E ; Cn # [7] <reserved-16F88>..<reserved-16F8E>
|
||||
16FA0..16FDF ; Cn # [64] <reserved-16FA0>..<reserved-16FDF>
|
||||
16FE4..16FFF ; Cn # [28] <reserved-16FE4>..<reserved-16FFF>
|
||||
16FE5..16FEF ; Cn # [11] <reserved-16FE5>..<reserved-16FEF>
|
||||
16FF2..16FFF ; Cn # [14] <reserved-16FF2>..<reserved-16FFF>
|
||||
187F8..187FF ; Cn # [8] <reserved-187F8>..<reserved-187FF>
|
||||
18AF3..1AFFF ; Cn # [9485] <reserved-18AF3>..<reserved-1AFFF>
|
||||
18CD6..18CFF ; Cn # [42] <reserved-18CD6>..<reserved-18CFF>
|
||||
18D09..1AFFF ; Cn # [8951] <reserved-18D09>..<reserved-1AFFF>
|
||||
1B11F..1B14F ; Cn # [49] <reserved-1B11F>..<reserved-1B14F>
|
||||
1B153..1B163 ; Cn # [17] <reserved-1B153>..<reserved-1B163>
|
||||
1B168..1B16F ; Cn # [8] <reserved-1B168>..<reserved-1B16F>
|
||||
|
@ -639,17 +648,15 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
|
|||
1F0C0 ; Cn # <reserved-1F0C0>
|
||||
1F0D0 ; Cn # <reserved-1F0D0>
|
||||
1F0F6..1F0FF ; Cn # [10] <reserved-1F0F6>..<reserved-1F0FF>
|
||||
1F10D..1F10F ; Cn # [3] <reserved-1F10D>..<reserved-1F10F>
|
||||
1F16D..1F16F ; Cn # [3] <reserved-1F16D>..<reserved-1F16F>
|
||||
1F1AD..1F1E5 ; Cn # [57] <reserved-1F1AD>..<reserved-1F1E5>
|
||||
1F1AE..1F1E5 ; Cn # [56] <reserved-1F1AE>..<reserved-1F1E5>
|
||||
1F203..1F20F ; Cn # [13] <reserved-1F203>..<reserved-1F20F>
|
||||
1F23C..1F23F ; Cn # [4] <reserved-1F23C>..<reserved-1F23F>
|
||||
1F249..1F24F ; Cn # [7] <reserved-1F249>..<reserved-1F24F>
|
||||
1F252..1F25F ; Cn # [14] <reserved-1F252>..<reserved-1F25F>
|
||||
1F266..1F2FF ; Cn # [154] <reserved-1F266>..<reserved-1F2FF>
|
||||
1F6D6..1F6DF ; Cn # [10] <reserved-1F6D6>..<reserved-1F6DF>
|
||||
1F6D8..1F6DF ; Cn # [8] <reserved-1F6D8>..<reserved-1F6DF>
|
||||
1F6ED..1F6EF ; Cn # [3] <reserved-1F6ED>..<reserved-1F6EF>
|
||||
1F6FB..1F6FF ; Cn # [5] <reserved-1F6FB>..<reserved-1F6FF>
|
||||
1F6FD..1F6FF ; Cn # [3] <reserved-1F6FD>..<reserved-1F6FF>
|
||||
1F774..1F77F ; Cn # [12] <reserved-1F774>..<reserved-1F77F>
|
||||
1F7D9..1F7DF ; Cn # [7] <reserved-1F7D9>..<reserved-1F7DF>
|
||||
1F7EC..1F7FF ; Cn # [20] <reserved-1F7EC>..<reserved-1F7FF>
|
||||
|
@ -657,32 +664,36 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF>
|
|||
1F848..1F84F ; Cn # [8] <reserved-1F848>..<reserved-1F84F>
|
||||
1F85A..1F85F ; Cn # [6] <reserved-1F85A>..<reserved-1F85F>
|
||||
1F888..1F88F ; Cn # [8] <reserved-1F888>..<reserved-1F88F>
|
||||
1F8AE..1F8FF ; Cn # [82] <reserved-1F8AE>..<reserved-1F8FF>
|
||||
1F90C ; Cn # <reserved-1F90C>
|
||||
1F972 ; Cn # <reserved-1F972>
|
||||
1F977..1F979 ; Cn # [3] <reserved-1F977>..<reserved-1F979>
|
||||
1F9A3..1F9A4 ; Cn # [2] <reserved-1F9A3>..<reserved-1F9A4>
|
||||
1F9AB..1F9AD ; Cn # [3] <reserved-1F9AB>..<reserved-1F9AD>
|
||||
1F9CB..1F9CC ; Cn # [2] <reserved-1F9CB>..<reserved-1F9CC>
|
||||
1F8AE..1F8AF ; Cn # [2] <reserved-1F8AE>..<reserved-1F8AF>
|
||||
1F8B2..1F8FF ; Cn # [78] <reserved-1F8B2>..<reserved-1F8FF>
|
||||
1F979 ; Cn # <reserved-1F979>
|
||||
1F9CC ; Cn # <reserved-1F9CC>
|
||||
1FA54..1FA5F ; Cn # [12] <reserved-1FA54>..<reserved-1FA5F>
|
||||
1FA6E..1FA6F ; Cn # [2] <reserved-1FA6E>..<reserved-1FA6F>
|
||||
1FA74..1FA77 ; Cn # [4] <reserved-1FA74>..<reserved-1FA77>
|
||||
1FA75..1FA77 ; Cn # [3] <reserved-1FA75>..<reserved-1FA77>
|
||||
1FA7B..1FA7F ; Cn # [5] <reserved-1FA7B>..<reserved-1FA7F>
|
||||
1FA83..1FA8F ; Cn # [13] <reserved-1FA83>..<reserved-1FA8F>
|
||||
1FA96..1FFFF ; Cn # [1386] <reserved-1FA96>..<noncharacter-1FFFF>
|
||||
2A6D7..2A6FF ; Cn # [41] <reserved-2A6D7>..<reserved-2A6FF>
|
||||
1FA87..1FA8F ; Cn # [9] <reserved-1FA87>..<reserved-1FA8F>
|
||||
1FAA9..1FAAF ; Cn # [7] <reserved-1FAA9>..<reserved-1FAAF>
|
||||
1FAB7..1FABF ; Cn # [9] <reserved-1FAB7>..<reserved-1FABF>
|
||||
1FAC3..1FACF ; Cn # [13] <reserved-1FAC3>..<reserved-1FACF>
|
||||
1FAD7..1FAFF ; Cn # [41] <reserved-1FAD7>..<reserved-1FAFF>
|
||||
1FB93 ; Cn # <reserved-1FB93>
|
||||
1FBCB..1FBEF ; Cn # [37] <reserved-1FBCB>..<reserved-1FBEF>
|
||||
1FBFA..1FFFF ; Cn # [1030] <reserved-1FBFA>..<noncharacter-1FFFF>
|
||||
2A6DE..2A6FF ; Cn # [34] <reserved-2A6DE>..<reserved-2A6FF>
|
||||
2B735..2B73F ; Cn # [11] <reserved-2B735>..<reserved-2B73F>
|
||||
2B81E..2B81F ; Cn # [2] <reserved-2B81E>..<reserved-2B81F>
|
||||
2CEA2..2CEAF ; Cn # [14] <reserved-2CEA2>..<reserved-2CEAF>
|
||||
2EBE1..2F7FF ; Cn # [3103] <reserved-2EBE1>..<reserved-2F7FF>
|
||||
2FA1E..E0000 ; Cn # [722403] <reserved-2FA1E>..<reserved-E0000>
|
||||
2FA1E..2FFFF ; Cn # [1506] <reserved-2FA1E>..<noncharacter-2FFFF>
|
||||
3134B..E0000 ; Cn # [715958] <reserved-3134B>..<reserved-E0000>
|
||||
E0002..E001F ; Cn # [30] <reserved-E0002>..<reserved-E001F>
|
||||
E0080..E00FF ; Cn # [128] <reserved-E0080>..<reserved-E00FF>
|
||||
E01F0..EFFFF ; Cn # [65040] <reserved-E01F0>..<noncharacter-EFFFF>
|
||||
FFFFE..FFFFF ; Cn # [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
|
||||
10FFFE..10FFFF; Cn # [2] <noncharacter-10FFFE>..<noncharacter-10FFFF>
|
||||
|
||||
# Total code points: 836602
|
||||
# Total code points: 830672
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1285,7 +1296,9 @@ A7BA ; Lu # LATIN CAPITAL LETTER GLOTTAL A
|
|||
A7BC ; Lu # LATIN CAPITAL LETTER GLOTTAL I
|
||||
A7BE ; Lu # LATIN CAPITAL LETTER GLOTTAL U
|
||||
A7C2 ; Lu # LATIN CAPITAL LETTER ANGLICANA W
|
||||
A7C4..A7C6 ; Lu # [3] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER Z WITH PALATAL HOOK
|
||||
A7C4..A7C7 ; Lu # [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
|
||||
A7C9 ; Lu # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
|
||||
A7F5 ; Lu # LATIN CAPITAL LETTER REVERSED HALF H
|
||||
FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
10400..10427 ; Lu # [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW
|
||||
104B0..104D3 ; Lu # [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA
|
||||
|
@ -1325,7 +1338,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP
|
|||
1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA
|
||||
1E900..1E921 ; Lu # [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
|
||||
|
||||
# Total code points: 1788
|
||||
# Total code points: 1791
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1932,9 +1945,12 @@ A7BB ; Ll # LATIN SMALL LETTER GLOTTAL A
|
|||
A7BD ; Ll # LATIN SMALL LETTER GLOTTAL I
|
||||
A7BF ; Ll # LATIN SMALL LETTER GLOTTAL U
|
||||
A7C3 ; Ll # LATIN SMALL LETTER ANGLICANA W
|
||||
A7C8 ; Ll # LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY
|
||||
A7CA ; Ll # LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
|
||||
A7F6 ; Ll # LATIN SMALL LETTER REVERSED HALF H
|
||||
A7FA ; Ll # LATIN LETTER SMALL CAPITAL TURNED M
|
||||
AB30..AB5A ; Ll # [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
|
||||
AB60..AB67 ; Ll # [8] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TS DIGRAPH WITH RETROFLEX HOOK
|
||||
AB60..AB68 ; Ll # [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE
|
||||
AB70..ABBF ; Ll # [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
|
||||
FB00..FB06 ; Ll # [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
|
||||
FB13..FB17 ; Ll # [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
|
||||
|
@ -1974,7 +1990,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL
|
|||
1D7CB ; Ll # MATHEMATICAL BOLD SMALL DIGAMMA
|
||||
1E922..1E943 ; Ll # [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
|
||||
|
||||
# Total code points: 2151
|
||||
# Total code points: 2155
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -2049,6 +2065,7 @@ AA70 ; Lm # MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
|
|||
AADD ; Lm # TAI VIET SYMBOL SAM
|
||||
AAF3..AAF4 ; Lm # [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK
|
||||
AB5C..AB5F ; Lm # [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
|
||||
AB69 ; Lm # MODIFIER LETTER SMALL TURNED W
|
||||
FF70 ; Lm # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
16B40..16B43 ; Lm # [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM
|
||||
|
@ -2058,7 +2075,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
|
|||
1E137..1E13D ; Lm # [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
|
||||
1E94B ; Lm # ADLAM NASALIZATION MARK
|
||||
|
||||
# Total code points: 259
|
||||
# Total code points: 260
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -2088,7 +2105,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
|
|||
0840..0858 ; Lo # [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
|
||||
0860..086A ; Lo # [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
|
||||
08A0..08B4 ; Lo # [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
|
||||
08B6..08BD ; Lo # [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
|
||||
08B6..08C7 ; Lo # [18] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
|
||||
0904..0939 ; Lo # [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
|
||||
093D ; Lo # DEVANAGARI SIGN AVAGRAHA
|
||||
0950 ; Lo # DEVANAGARI OM
|
||||
|
@ -2164,7 +2181,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
|
|||
0CDE ; Lo # KANNADA LETTER FA
|
||||
0CE0..0CE1 ; Lo # [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
|
||||
0CF1..0CF2 ; Lo # [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
|
||||
0D05..0D0C ; Lo # [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
|
||||
0D04..0D0C ; Lo # [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
|
||||
0D0E..0D10 ; Lo # [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
|
||||
0D12..0D3A ; Lo # [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
|
||||
0D3D ; Lo # MALAYALAM SIGN AVAGRAHA
|
||||
|
@ -2277,10 +2294,10 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
|
|||
30FF ; Lo # KATAKANA DIGRAPH KOTO
|
||||
3105..312F ; Lo # [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN
|
||||
3131..318E ; Lo # [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
|
||||
31A0..31BA ; Lo # [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
|
||||
31A0..31BF ; Lo # [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
|
||||
31F0..31FF ; Lo # [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
|
||||
3400..4DB5 ; Lo # [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
|
||||
4E00..9FEF ; Lo # [20976] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEF
|
||||
3400..4DBF ; Lo # [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
|
||||
4E00..9FFC ; Lo # [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
|
||||
A000..A014 ; Lo # [21] YI SYLLABLE IT..YI SYLLABLE E
|
||||
A016..A48C ; Lo # [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
|
||||
A4D0..A4F7 ; Lo # [40] LISU LETTER BA..LISU LETTER OE
|
||||
|
@ -2404,15 +2421,19 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
|
|||
10B80..10B91 ; Lo # [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW
|
||||
10C00..10C48 ; Lo # [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
|
||||
10D00..10D23 ; Lo # [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
|
||||
10E80..10EA9 ; Lo # [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
|
||||
10EB0..10EB1 ; Lo # [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
|
||||
10F00..10F1C ; Lo # [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
|
||||
10F27 ; Lo # OLD SOGDIAN LIGATURE AYIN-DALETH
|
||||
10F30..10F45 ; Lo # [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
|
||||
10FB0..10FC4 ; Lo # [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW
|
||||
10FE0..10FF6 ; Lo # [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH
|
||||
11003..11037 ; Lo # [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA
|
||||
11083..110AF ; Lo # [45] KAITHI LETTER A..KAITHI LETTER HA
|
||||
110D0..110E8 ; Lo # [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE
|
||||
11103..11126 ; Lo # [36] CHAKMA LETTER AA..CHAKMA LETTER HAA
|
||||
11144 ; Lo # CHAKMA LETTER LHAA
|
||||
11147 ; Lo # CHAKMA LETTER VAA
|
||||
11150..11172 ; Lo # [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA
|
||||
11176 ; Lo # MAHAJANI LIGATURE SHRI
|
||||
11183..111B2 ; Lo # [48] SHARADA LETTER A..SHARADA LETTER HA
|
||||
|
@ -2438,7 +2459,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
|
|||
1135D..11361 ; Lo # [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL
|
||||
11400..11434 ; Lo # [53] NEWA LETTER A..NEWA LETTER HA
|
||||
11447..1144A ; Lo # [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
|
||||
1145F ; Lo # NEWA LETTER VEDIC ANUSVARA
|
||||
1145F..11461 ; Lo # [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA
|
||||
11480..114AF ; Lo # [48] TIRHUTA ANJI..TIRHUTA LETTER HA
|
||||
114C4..114C5 ; Lo # [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG
|
||||
114C7 ; Lo # TIRHUTA OM
|
||||
|
@ -2450,7 +2471,13 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
|
|||
116B8 ; Lo # TAKRI LETTER ARCHAIC KHA
|
||||
11700..1171A ; Lo # [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
|
||||
11800..1182B ; Lo # [44] DOGRA LETTER A..DOGRA LETTER RRA
|
||||
118FF ; Lo # WARANG CITI OM
|
||||
118FF..11906 ; Lo # [8] WARANG CITI OM..DIVES AKURU LETTER E
|
||||
11909 ; Lo # DIVES AKURU LETTER O
|
||||
1190C..11913 ; Lo # [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA
|
||||
11915..11916 ; Lo # [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA
|
||||
11918..1192F ; Lo # [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA
|
||||
1193F ; Lo # DIVES AKURU PREFIXED NASAL SIGN
|
||||
11941 ; Lo # DIVES AKURU INITIAL RA
|
||||
119A0..119A7 ; Lo # [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR
|
||||
119AA..119D0 ; Lo # [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA
|
||||
119E1 ; Lo # NANDINAGARI SIGN AVAGRAHA
|
||||
|
@ -2475,6 +2502,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
|
|||
11D6A..11D89 ; Lo # [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA
|
||||
11D98 ; Lo # GUNJALA GONDI OM
|
||||
11EE0..11EF2 ; Lo # [19] MAKASAR LETTER KA..MAKASAR ANGKA
|
||||
11FB0 ; Lo # LISU LETTER YHA
|
||||
12000..12399 ; Lo # [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
|
||||
12480..12543 ; Lo # [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
|
||||
13000..1342E ; Lo # [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
|
||||
|
@ -2488,7 +2516,8 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
|
|||
16F00..16F4A ; Lo # [75] MIAO LETTER PA..MIAO LETTER RTE
|
||||
16F50 ; Lo # MIAO LETTER NASALIZATION
|
||||
17000..187F7 ; Lo # [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
|
||||
18800..18AF2 ; Lo # [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
|
||||
18800..18CD5 ; Lo # [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5
|
||||
18D00..18D08 ; Lo # [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
|
||||
1B000..1B11E ; Lo # [287] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER N-MU-MO-2
|
||||
1B150..1B152 ; Lo # [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
|
||||
1B164..1B167 ; Lo # [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
|
||||
|
@ -2534,14 +2563,15 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
|
|||
1EEA1..1EEA3 ; Lo # [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL
|
||||
1EEA5..1EEA9 ; Lo # [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH
|
||||
1EEAB..1EEBB ; Lo # [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
|
||||
20000..2A6D6 ; Lo # [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
|
||||
20000..2A6DD ; Lo # [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
|
||||
2A700..2B734 ; Lo # [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
2B740..2B81D ; Lo # [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Lo # [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Lo # [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2F800..2FA1D ; Lo # [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
|
||||
|
||||
# Total code points: 121414
|
||||
# Total code points: 127004
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -2605,7 +2635,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
|
|||
0B3F ; Mn # ORIYA VOWEL SIGN I
|
||||
0B41..0B44 ; Mn # [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
|
||||
0B4D ; Mn # ORIYA SIGN VIRAMA
|
||||
0B56 ; Mn # ORIYA AI LENGTH MARK
|
||||
0B55..0B56 ; Mn # [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
|
||||
0B62..0B63 ; Mn # [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
|
||||
0B82 ; Mn # TAMIL SIGN ANUSVARA
|
||||
0BC0 ; Mn # TAMIL VOWEL SIGN II
|
||||
|
@ -2628,6 +2658,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
|
|||
0D41..0D44 ; Mn # [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
|
||||
0D4D ; Mn # MALAYALAM SIGN VIRAMA
|
||||
0D62..0D63 ; Mn # [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
|
||||
0D81 ; Mn # SINHALA SIGN CANDRABINDU
|
||||
0DCA ; Mn # SINHALA SIGN AL-LAKUNA
|
||||
0DD2..0DD4 ; Mn # [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
|
||||
0DD6 ; Mn # SINHALA VOWEL SIGN DIGA PAA-PILLA
|
||||
|
@ -2685,6 +2716,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
|
|||
1A73..1A7C ; Mn # [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
|
||||
1A7F ; Mn # TAI THAM COMBINING CRYPTOGRAMMIC DOT
|
||||
1AB0..1ABD ; Mn # [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
|
||||
1ABF..1AC0 ; Mn # [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
|
||||
1B00..1B03 ; Mn # [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
|
||||
1B34 ; Mn # BALINESE SIGN REREKAN
|
||||
1B36..1B3A ; Mn # [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
|
||||
|
@ -2725,6 +2757,7 @@ A802 ; Mn # SYLOTI NAGRI SIGN DVISVARA
|
|||
A806 ; Mn # SYLOTI NAGRI SIGN HASANTA
|
||||
A80B ; Mn # SYLOTI NAGRI SIGN ANUSVARA
|
||||
A825..A826 ; Mn # [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
|
||||
A82C ; Mn # SYLOTI NAGRI SIGN ALTERNATE HASANTA
|
||||
A8C4..A8C5 ; Mn # [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
|
||||
A8E0..A8F1 ; Mn # [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
|
||||
A8FF ; Mn # DEVANAGARI VOWEL SIGN AY
|
||||
|
@ -2764,6 +2797,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
|
|||
10A3F ; Mn # KHAROSHTHI VIRAMA
|
||||
10AE5..10AE6 ; Mn # [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
|
||||
10D24..10D27 ; Mn # [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
|
||||
10EAB..10EAC ; Mn # [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
|
||||
10F46..10F50 ; Mn # [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
|
||||
11001 ; Mn # BRAHMI SIGN ANUSVARA
|
||||
11038..11046 ; Mn # [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
|
||||
|
@ -2777,6 +2811,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
|
|||
11180..11181 ; Mn # [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA
|
||||
111B6..111BE ; Mn # [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
|
||||
111C9..111CC ; Mn # [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
|
||||
111CF ; Mn # SHARADA SIGN INVERTED CANDRABINDU
|
||||
1122F..11231 ; Mn # [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
|
||||
11234 ; Mn # KHOJKI SIGN ANUSVARA
|
||||
11236..11237 ; Mn # [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
|
||||
|
@ -2812,6 +2847,9 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
|
|||
11727..1172B ; Mn # [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
|
||||
1182F..11837 ; Mn # [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
|
||||
11839..1183A ; Mn # [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA
|
||||
1193B..1193C ; Mn # [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
|
||||
1193E ; Mn # DIVES AKURU VIRAMA
|
||||
11943 ; Mn # DIVES AKURU SIGN NUKTA
|
||||
119D4..119D7 ; Mn # [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR
|
||||
119DA..119DB ; Mn # [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI
|
||||
119E0 ; Mn # NANDINAGARI SIGN VIRAMA
|
||||
|
@ -2843,6 +2881,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
|
|||
16B30..16B36 ; Mn # [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
|
||||
16F4F ; Mn # MIAO SIGN CONSONANT MODIFIER BAR
|
||||
16F8F..16F92 ; Mn # [4] MIAO TONE RIGHT..MIAO TONE BELOW
|
||||
16FE4 ; Mn # KHITAN SMALL SCRIPT FILLER
|
||||
1BC9D..1BC9E ; Mn # [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
|
||||
1D167..1D169 ; Mn # [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
|
||||
1D17B..1D182 ; Mn # [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
|
||||
|
@ -2866,7 +2905,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
|
|||
1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
|
||||
E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 1826
|
||||
# Total code points: 1839
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -3003,6 +3042,7 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK
|
|||
11182 ; Mc # SHARADA SIGN VISARGA
|
||||
111B3..111B5 ; Mc # [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II
|
||||
111BF..111C0 ; Mc # [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
|
||||
111CE ; Mc # SHARADA VOWEL SIGN PRISHTHAMATRA E
|
||||
1122C..1122E ; Mc # [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
|
||||
11232..11233 ; Mc # [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
|
||||
11235 ; Mc # KHOJKI SIGN VIRAMA
|
||||
|
@ -3034,6 +3074,11 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK
|
|||
11726 ; Mc # AHOM VOWEL SIGN E
|
||||
1182C..1182E ; Mc # [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II
|
||||
11838 ; Mc # DOGRA SIGN VISARGA
|
||||
11930..11935 ; Mc # [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E
|
||||
11937..11938 ; Mc # [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
|
||||
1193D ; Mc # DIVES AKURU SIGN HALANTA
|
||||
11940 ; Mc # DIVES AKURU MEDIAL YA
|
||||
11942 ; Mc # DIVES AKURU MEDIAL RA
|
||||
119D1..119D3 ; Mc # [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II
|
||||
119DC..119DF ; Mc # [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA
|
||||
119E4 ; Mc # NANDINAGARI VOWEL SIGN PRISHTHAMATRA E
|
||||
|
@ -3050,10 +3095,11 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK
|
|||
11D96 ; Mc # GUNJALA GONDI SIGN VISARGA
|
||||
11EF5..11EF6 ; Mc # [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
|
||||
16F51..16F87 ; Mc # [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
|
||||
16FF0..16FF1 ; Mc # [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
|
||||
1D165..1D166 ; Mc # [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
|
||||
1D16D..1D172 ; Mc # [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
|
||||
|
||||
# Total code points: 429
|
||||
# Total code points: 443
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -3109,6 +3155,7 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
|
|||
116C0..116C9 ; Nd # [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
|
||||
11730..11739 ; Nd # [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
|
||||
118E0..118E9 ; Nd # [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
|
||||
11950..11959 ; Nd # [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
|
||||
11C50..11C59 ; Nd # [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
|
||||
11D50..11D59 ; Nd # [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
|
||||
11DA0..11DA9 ; Nd # [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
|
||||
|
@ -3118,8 +3165,9 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
|
|||
1E140..1E149 ; Nd # [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
|
||||
1E2F0..1E2F9 ; Nd # [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
|
||||
1E950..1E959 ; Nd # [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
|
||||
1FBF0..1FBF9 ; Nd # [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
|
||||
|
||||
# Total code points: 630
|
||||
# Total code points: 650
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -3197,6 +3245,7 @@ A830..A835 ; No # [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTIO
|
|||
10E60..10E7E ; No # [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
|
||||
10F1D..10F26 ; No # [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
|
||||
10F51..10F54 ; No # [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED
|
||||
10FC5..10FCB ; No # [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED
|
||||
11052..11065 ; No # [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND
|
||||
111E1..111F4 ; No # [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND
|
||||
1173A..1173B ; No # [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY
|
||||
|
@ -3215,7 +3264,7 @@ A830..A835 ; No # [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTIO
|
|||
1ED2F..1ED3D ; No # [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH
|
||||
1F100..1F10C ; No # [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
|
||||
|
||||
# Total code points: 888
|
||||
# Total code points: 895
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -3322,8 +3371,9 @@ FE31..FE32 ; Pd # [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION
|
|||
FE58 ; Pd # SMALL EM DASH
|
||||
FE63 ; Pd # SMALL HYPHEN-MINUS
|
||||
FF0D ; Pd # FULLWIDTH HYPHEN-MINUS
|
||||
10EAD ; Pd # YEZIDI HYPHENATION MARK
|
||||
|
||||
# Total code points: 24
|
||||
# Total code points: 25
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -3591,6 +3641,7 @@ FF3F ; Pc # FULLWIDTH LOW LINE
|
|||
2E3C..2E3F ; Po # [4] STENOGRAPHIC FULL STOP..CAPITULUM
|
||||
2E41 ; Po # REVERSED COMMA
|
||||
2E43..2E4F ; Po # [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER
|
||||
2E52 ; Po # TIRONIAN SIGN CAPITAL ET
|
||||
3001..3003 ; Po # [3] IDEOGRAPHIC COMMA..DITTO MARK
|
||||
303D ; Po # PART ALTERNATION MARK
|
||||
30FB ; Po # KATAKANA MIDDLE DOT
|
||||
|
@ -3656,7 +3707,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL
|
|||
11238..1123D ; Po # [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN
|
||||
112A9 ; Po # MULTANI SECTION MARK
|
||||
1144B..1144F ; Po # [5] NEWA DANDA..NEWA ABBREVIATION SIGN
|
||||
1145B ; Po # NEWA PLACEHOLDER MARK
|
||||
1145A..1145B ; Po # [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK
|
||||
1145D ; Po # NEWA INSERTION SIGN
|
||||
114C6 ; Po # TIRHUTA ABBREVIATION SIGN
|
||||
115C1..115D7 ; Po # [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
|
||||
|
@ -3664,6 +3715,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL
|
|||
11660..1166C ; Po # [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT
|
||||
1173C..1173E ; Po # [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
|
||||
1183B ; Po # DOGRA ABBREVIATION SIGN
|
||||
11944..11946 ; Po # [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK
|
||||
119E2 ; Po # NANDINAGARI SIGN SIDDHAM
|
||||
11A3F..11A46 ; Po # [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK
|
||||
11A9A..11A9C ; Po # [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD
|
||||
|
@ -3683,7 +3735,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL
|
|||
1DA87..1DA8B ; Po # [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS
|
||||
1E95E..1E95F ; Po # [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK
|
||||
|
||||
# Total code points: 588
|
||||
# Total code points: 593
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -3812,13 +3864,14 @@ A700..A716 ; Sk # [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTE
|
|||
A720..A721 ; Sk # [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
|
||||
A789..A78A ; Sk # [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
|
||||
AB5B ; Sk # MODIFIER BREVE WITH INVERTED BREVE
|
||||
AB6A..AB6B ; Sk # [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK
|
||||
FBB2..FBC1 ; Sk # [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
|
||||
FF3E ; Sk # FULLWIDTH CIRCUMFLEX ACCENT
|
||||
FF40 ; Sk # FULLWIDTH GRAVE ACCENT
|
||||
FFE3 ; Sk # FULLWIDTH MACRON
|
||||
1F3FB..1F3FF ; Sk # [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
|
||||
|
||||
# Total code points: 121
|
||||
# Total code points: 123
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -3904,8 +3957,9 @@ FFE3 ; Sk # FULLWIDTH MACRON
|
|||
2B45..2B46 ; So # [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
|
||||
2B4D..2B73 ; So # [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
|
||||
2B76..2B95 ; So # [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
|
||||
2B98..2BFF ; So # [104] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..HELLSCHREIBER PAUSE SYMBOL
|
||||
2B97..2BFF ; So # [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL
|
||||
2CE5..2CEA ; So # [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA
|
||||
2E50..2E51 ; So # [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR
|
||||
2E80..2E99 ; So # [26] CJK RADICAL REPEAT..CJK RADICAL RAP
|
||||
2E9B..2EF3 ; So # [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
|
||||
2F00..2FD5 ; So # [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
|
||||
|
@ -3938,7 +3992,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
|
|||
10137..1013F ; So # [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
|
||||
10179..10189 ; So # [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
|
||||
1018C..1018E ; So # [3] GREEK SINUSOID SIGN..NOMISMA SIGN
|
||||
10190..1019B ; So # [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
|
||||
10190..1019C ; So # [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL
|
||||
101A0 ; So # GREEK SYMBOL TAU RHO
|
||||
101D0..101FC ; So # [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
|
||||
10877..10878 ; So # [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON
|
||||
|
@ -3973,17 +4027,16 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
|
|||
1F0B1..1F0BF ; So # [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER
|
||||
1F0C1..1F0CF ; So # [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
|
||||
1F0D1..1F0F5 ; So # [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21
|
||||
1F110..1F16C ; So # [93] PARENTHESIZED LATIN CAPITAL LETTER A..RAISED MR SIGN
|
||||
1F170..1F1AC ; So # [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD
|
||||
1F10D..1F1AD ; So # [161] CIRCLED ZERO WITH SLASH..MASK WORK SYMBOL
|
||||
1F1E6..1F202 ; So # [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA
|
||||
1F210..1F23B ; So # [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
|
||||
1F240..1F248 ; So # [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
|
||||
1F250..1F251 ; So # [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
|
||||
1F260..1F265 ; So # [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
|
||||
1F300..1F3FA ; So # [251] CYCLONE..AMPHORA
|
||||
1F400..1F6D5 ; So # [726] RAT..HINDU TEMPLE
|
||||
1F400..1F6D7 ; So # [728] RAT..ELEVATOR
|
||||
1F6E0..1F6EC ; So # [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
|
||||
1F6F0..1F6FA ; So # [11] SATELLITE..AUTO RICKSHAW
|
||||
1F6F0..1F6FC ; So # [13] SATELLITE..ROLLER SKATE
|
||||
1F700..1F773 ; So # [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
|
||||
1F780..1F7D8 ; So # [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE
|
||||
1F7E0..1F7EB ; So # [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
|
||||
|
@ -3992,20 +4045,22 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
|
|||
1F850..1F859 ; So # [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
|
||||
1F860..1F887 ; So # [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
|
||||
1F890..1F8AD ; So # [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
|
||||
1F900..1F90B ; So # [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
|
||||
1F90D..1F971 ; So # [101] WHITE HEART..YAWNING FACE
|
||||
1F973..1F976 ; So # [4] FACE WITH PARTY HORN AND PARTY HAT..FREEZING FACE
|
||||
1F97A..1F9A2 ; So # [41] FACE WITH PLEADING EYES..SWAN
|
||||
1F9A5..1F9AA ; So # [6] SLOTH..OYSTER
|
||||
1F9AE..1F9CA ; So # [29] GUIDE DOG..ICE CUBE
|
||||
1F8B0..1F8B1 ; So # [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
|
||||
1F900..1F978 ; So # [121] CIRCLED CROSS FORMEE WITH FOUR DOTS..DISGUISED FACE
|
||||
1F97A..1F9CB ; So # [82] FACE WITH PLEADING EYES..BUBBLE TEA
|
||||
1F9CD..1FA53 ; So # [135] STANDING PERSON..BLACK CHESS KNIGHT-BISHOP
|
||||
1FA60..1FA6D ; So # [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
|
||||
1FA70..1FA73 ; So # [4] BALLET SHOES..SHORTS
|
||||
1FA70..1FA74 ; So # [5] BALLET SHOES..THONG SANDAL
|
||||
1FA78..1FA7A ; So # [3] DROP OF BLOOD..STETHOSCOPE
|
||||
1FA80..1FA82 ; So # [3] YO-YO..PARACHUTE
|
||||
1FA90..1FA95 ; So # [6] RINGED PLANET..BANJO
|
||||
1FA80..1FA86 ; So # [7] YO-YO..NESTING DOLLS
|
||||
1FA90..1FAA8 ; So # [25] RINGED PLANET..ROCK
|
||||
1FAB0..1FAB6 ; So # [7] FLY..FEATHER
|
||||
1FAC0..1FAC2 ; So # [3] ANATOMICAL HEART..PEOPLE HUGGING
|
||||
1FAD0..1FAD6 ; So # [7] BLUEBERRIES..TEAPOT
|
||||
1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
|
||||
1FB94..1FBCA ; So # [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
|
||||
|
||||
# Total code points: 6161
|
||||
# Total code points: 6431
|
||||
|
||||
# ================================================
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# GraphemeBreakProperty-12.1.0.txt
|
||||
# Date: 2019-03-10, 10:53:12 GMT
|
||||
# GraphemeBreakProperty-13.0.0.txt
|
||||
# Date: 2019-10-21, 14:30:35 GMT
|
||||
# © 2019 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
|
@ -26,11 +26,13 @@
|
|||
110BD ; Prepend # Cf KAITHI NUMBER SIGN
|
||||
110CD ; Prepend # Cf KAITHI NUMBER SIGN ABOVE
|
||||
111C2..111C3 ; Prepend # Lo [2] SHARADA SIGN JIHVAMULIYA..SHARADA SIGN UPADHMANIYA
|
||||
1193F ; Prepend # Lo DIVES AKURU PREFIXED NASAL SIGN
|
||||
11941 ; Prepend # Lo DIVES AKURU INITIAL RA
|
||||
11A3A ; Prepend # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA
|
||||
11A84..11A89 ; Prepend # Lo [6] SOYOMBO SIGN JIHVAMULIYA..SOYOMBO CLUSTER-INITIAL LETTER SA
|
||||
11D46 ; Prepend # Lo MASARAM GONDI REPHA
|
||||
|
||||
# Total code points: 22
|
||||
# Total code points: 24
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -139,7 +141,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
|||
0B3F ; Extend # Mn ORIYA VOWEL SIGN I
|
||||
0B41..0B44 ; Extend # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
|
||||
0B4D ; Extend # Mn ORIYA SIGN VIRAMA
|
||||
0B56 ; Extend # Mn ORIYA AI LENGTH MARK
|
||||
0B55..0B56 ; Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
|
||||
0B57 ; Extend # Mc ORIYA AU LENGTH MARK
|
||||
0B62..0B63 ; Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
|
||||
0B82 ; Extend # Mn TAMIL SIGN ANUSVARA
|
||||
|
@ -169,6 +171,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
|||
0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA
|
||||
0D57 ; Extend # Mc MALAYALAM AU LENGTH MARK
|
||||
0D62..0D63 ; Extend # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
|
||||
0D81 ; Extend # Mn SINHALA SIGN CANDRABINDU
|
||||
0DCA ; Extend # Mn SINHALA SIGN AL-LAKUNA
|
||||
0DCF ; Extend # Mc SINHALA VOWEL SIGN AELA-PILLA
|
||||
0DD2..0DD4 ; Extend # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
|
||||
|
@ -229,6 +232,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
|||
1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
|
||||
1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
|
||||
1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY
|
||||
1ABF..1AC0 ; Extend # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
|
||||
1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
|
||||
1B34 ; Extend # Mn BALINESE SIGN REREKAN
|
||||
1B35 ; Extend # Mc BALINESE VOWEL SIGN TEDUNG
|
||||
|
@ -275,6 +279,7 @@ A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA
|
|||
A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA
|
||||
A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA
|
||||
A825..A826 ; Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
|
||||
A82C ; Extend # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
|
||||
A8C4..A8C5 ; Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU
|
||||
A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
|
||||
A8FF ; Extend # Mn DEVANAGARI VOWEL SIGN AY
|
||||
|
@ -315,6 +320,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
|
|||
10A3F ; Extend # Mn KHAROSHTHI VIRAMA
|
||||
10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
|
||||
10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
|
||||
10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
|
||||
10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
|
||||
11001 ; Extend # Mn BRAHMI SIGN ANUSVARA
|
||||
11038..11046 ; Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
|
||||
|
@ -328,6 +334,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
|
|||
11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA
|
||||
111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
|
||||
111C9..111CC ; Extend # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
|
||||
111CF ; Extend # Mn SHARADA SIGN INVERTED CANDRABINDU
|
||||
1122F..11231 ; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
|
||||
11234 ; Extend # Mn KHOJKI SIGN ANUSVARA
|
||||
11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
|
||||
|
@ -368,6 +375,10 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
|
|||
11727..1172B ; Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER
|
||||
1182F..11837 ; Extend # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA
|
||||
11839..1183A ; Extend # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA
|
||||
11930 ; Extend # Mc DIVES AKURU VOWEL SIGN AA
|
||||
1193B..1193C ; Extend # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
|
||||
1193E ; Extend # Mn DIVES AKURU VIRAMA
|
||||
11943 ; Extend # Mn DIVES AKURU SIGN NUKTA
|
||||
119D4..119D7 ; Extend # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR
|
||||
119DA..119DB ; Extend # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI
|
||||
119E0 ; Extend # Mn NANDINAGARI SIGN VIRAMA
|
||||
|
@ -399,6 +410,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
|
|||
16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
|
||||
16F4F ; Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR
|
||||
16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
|
||||
16FE4 ; Extend # Mn KHITAN SMALL SCRIPT FILLER
|
||||
1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
|
||||
1D165 ; Extend # Mc MUSICAL SYMBOL COMBINING STEM
|
||||
1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
|
||||
|
@ -426,7 +438,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
|
|||
E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG
|
||||
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 1970
|
||||
# Total code points: 1984
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -539,6 +551,7 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
|
|||
11182 ; SpacingMark # Mc SHARADA SIGN VISARGA
|
||||
111B3..111B5 ; SpacingMark # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II
|
||||
111BF..111C0 ; SpacingMark # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
|
||||
111CE ; SpacingMark # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E
|
||||
1122C..1122E ; SpacingMark # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
|
||||
11232..11233 ; SpacingMark # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
|
||||
11235 ; SpacingMark # Mc KHOJKI SIGN VIRAMA
|
||||
|
@ -570,6 +583,11 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
|
|||
11726 ; SpacingMark # Mc AHOM VOWEL SIGN E
|
||||
1182C..1182E ; SpacingMark # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II
|
||||
11838 ; SpacingMark # Mc DOGRA SIGN VISARGA
|
||||
11931..11935 ; SpacingMark # Mc [5] DIVES AKURU VOWEL SIGN I..DIVES AKURU VOWEL SIGN E
|
||||
11937..11938 ; SpacingMark # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
|
||||
1193D ; SpacingMark # Mc DIVES AKURU SIGN HALANTA
|
||||
11940 ; SpacingMark # Mc DIVES AKURU MEDIAL YA
|
||||
11942 ; SpacingMark # Mc DIVES AKURU MEDIAL RA
|
||||
119D1..119D3 ; SpacingMark # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II
|
||||
119DC..119DF ; SpacingMark # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA
|
||||
119E4 ; SpacingMark # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E
|
||||
|
@ -586,10 +604,11 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
|
|||
11D96 ; SpacingMark # Mc GUNJALA GONDI SIGN VISARGA
|
||||
11EF5..11EF6 ; SpacingMark # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
|
||||
16F51..16F87 ; SpacingMark # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
|
||||
16FF0..16FF1 ; SpacingMark # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
|
||||
1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
|
||||
1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT
|
||||
|
||||
# Total code points: 375
|
||||
# Total code points: 388
|
||||
|
||||
# ================================================
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# ScriptExtensions-12.1.0.txt
|
||||
# Date: 2019-04-01, 09:10:42 GMT
|
||||
# © 2019 Unicode®, Inc.
|
||||
# ScriptExtensions-13.0.0.txt
|
||||
# Date: 2020-01-22, 00:07:43 GMT
|
||||
# © 2020 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
|
@ -147,19 +147,10 @@
|
|||
|
||||
# Script_Extensions=Arab Thaa
|
||||
|
||||
0660..0669 ; Arab Thaa # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
|
||||
FDF2 ; Arab Thaa # Lo ARABIC LIGATURE ALLAH ISOLATED FORM
|
||||
FDFD ; Arab Thaa # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
|
||||
|
||||
# Total code points: 12
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Armn Geor
|
||||
|
||||
0589 ; Armn Geor # Po ARMENIAN FULL STOP
|
||||
|
||||
# Total code points: 1
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -229,6 +220,14 @@ A66F ; Cyrl Glag # Mn COMBINING CYRILLIC VZMET
|
|||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cyrl Syrc
|
||||
|
||||
1DF8 ; Cyrl Syrc # Mn COMBINING DOT ABOVE LEFT
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Gran
|
||||
|
||||
1CD3 ; Deva Gran # Po VEDIC SIGN NIHSHVASA
|
||||
|
@ -305,6 +304,14 @@ A8F3 ; Deva Taml # Lo DEVANAGARI SIGN CANDRABINDU VIRAMA
|
|||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Hani Latn
|
||||
|
||||
A700..A707 ; Hani Latn # Sk [8] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER CHINESE TONE YANG RU
|
||||
|
||||
# Total code points: 8
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Hira Kana
|
||||
|
||||
3031..3035 ; Hira Kana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
|
||||
|
@ -352,6 +359,14 @@ FF9E..FF9F ; Hira Kana # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFW
|
|||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Thaa Yezi
|
||||
|
||||
0660..0669 ; Arab Thaa Yezi # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Cakm Sylo
|
||||
|
||||
09E6..09EF ; Beng Cakm Sylo # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
|
||||
|
@ -409,16 +424,6 @@ A92E ; Kali Latn Mymr # Po KAYAH LI SIGN CWI
|
|||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Rohg Syrc Thaa
|
||||
|
||||
060C ; Arab Rohg Syrc Thaa # Po ARABIC COMMA
|
||||
061B ; Arab Rohg Syrc Thaa # Po ARABIC SEMICOLON
|
||||
061F ; Arab Rohg Syrc Thaa # Po ARABIC QUESTION MARK
|
||||
|
||||
# Total code points: 3
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva Gran Knda
|
||||
|
||||
1CD0 ; Beng Deva Gran Knda # Mn VEDIC TONE KARSHANA
|
||||
|
@ -444,6 +449,16 @@ A92E ; Kali Latn Mymr # Po KAYAH LI SIGN CWI
|
|||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Rohg Syrc Thaa Yezi
|
||||
|
||||
060C ; Arab Rohg Syrc Thaa Yezi # Po ARABIC COMMA
|
||||
061B ; Arab Rohg Syrc Thaa Yezi # Po ARABIC SEMICOLON
|
||||
061F ; Arab Rohg Syrc Thaa Yezi # Po ARABIC QUESTION MARK
|
||||
|
||||
# Total code points: 3
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Bopo Hang Hani Hira Kana
|
||||
|
||||
3003 ; Bopo Hang Hani Hira Kana # Po DITTO MARK
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# Scripts-12.1.0.txt
|
||||
# Date: 2019-04-01, 09:10:42 GMT
|
||||
# © 2019 Unicode®, Inc.
|
||||
# Scripts-13.0.0.txt
|
||||
# Date: 2020-01-22, 00:07:43 GMT
|
||||
# © 2020 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
|
@ -89,7 +89,6 @@
|
|||
037E ; Common # Po GREEK QUESTION MARK
|
||||
0385 ; Common # Sk GREEK DIALYTIKA TONOS
|
||||
0387 ; Common # Po GREEK ANO TELEIA
|
||||
0589 ; Common # Po ARMENIAN FULL STOP
|
||||
0605 ; Common # Cf ARABIC NUMBER MARK ABOVE
|
||||
060C ; Common # Po ARABIC COMMA
|
||||
061B ; Common # Po ARABIC SEMICOLON
|
||||
|
@ -308,7 +307,7 @@
|
|||
2B47..2B4C ; Common # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
|
||||
2B4D..2B73 ; Common # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
|
||||
2B76..2B95 ; Common # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
|
||||
2B98..2BFF ; Common # So [104] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..HELLSCHREIBER PAUSE SYMBOL
|
||||
2B97..2BFF ; Common # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL
|
||||
2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
|
||||
2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET
|
||||
2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET
|
||||
|
@ -347,6 +346,8 @@
|
|||
2E41 ; Common # Po REVERSED COMMA
|
||||
2E42 ; Common # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
|
||||
2E43..2E4F ; Common # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER
|
||||
2E50..2E51 ; Common # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR
|
||||
2E52 ; Common # Po TIRONIAN SIGN CAPITAL ET
|
||||
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
|
||||
3000 ; Common # Zs IDEOGRAPHIC SPACE
|
||||
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
|
||||
|
@ -414,6 +415,7 @@ A839 ; Common # So NORTH INDIC QUANTITY MARK
|
|||
A92E ; Common # Po KAYAH LI SIGN CWI
|
||||
A9CF ; Common # Lm JAVANESE PANGRANGKEP
|
||||
AB5B ; Common # Sk MODIFIER BREVE WITH INVERTED BREVE
|
||||
AB6A..AB6B ; Common # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK
|
||||
FD3E ; Common # Pe ORNATE LEFT PARENTHESIS
|
||||
FD3F ; Common # Ps ORNATE RIGHT PARENTHESIS
|
||||
FE10..FE16 ; Common # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK
|
||||
|
@ -506,7 +508,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
|||
10100..10102 ; Common # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK
|
||||
10107..10133 ; Common # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
|
||||
10137..1013F ; Common # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
|
||||
10190..1019B ; Common # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
|
||||
10190..1019C ; Common # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL
|
||||
101D0..101FC ; Common # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
|
||||
102E1..102FB ; Common # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
|
||||
16FE2 ; Common # Po OLD CHINESE HOOK MARK
|
||||
|
@ -581,8 +583,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
|||
1F0C1..1F0CF ; Common # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
|
||||
1F0D1..1F0F5 ; Common # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21
|
||||
1F100..1F10C ; Common # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
|
||||
1F110..1F16C ; Common # So [93] PARENTHESIZED LATIN CAPITAL LETTER A..RAISED MR SIGN
|
||||
1F170..1F1AC ; Common # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD
|
||||
1F10D..1F1AD ; Common # So [161] CIRCLED ZERO WITH SLASH..MASK WORK SYMBOL
|
||||
1F1E6..1F1FF ; Common # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
|
||||
1F201..1F202 ; Common # So [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA
|
||||
1F210..1F23B ; Common # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
|
||||
|
@ -591,9 +592,9 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
|||
1F260..1F265 ; Common # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI
|
||||
1F300..1F3FA ; Common # So [251] CYCLONE..AMPHORA
|
||||
1F3FB..1F3FF ; Common # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
|
||||
1F400..1F6D5 ; Common # So [726] RAT..HINDU TEMPLE
|
||||
1F400..1F6D7 ; Common # So [728] RAT..ELEVATOR
|
||||
1F6E0..1F6EC ; Common # So [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
|
||||
1F6F0..1F6FA ; Common # So [11] SATELLITE..AUTO RICKSHAW
|
||||
1F6F0..1F6FC ; Common # So [13] SATELLITE..ROLLER SKATE
|
||||
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
|
||||
1F780..1F7D8 ; Common # So [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE
|
||||
1F7E0..1F7EB ; Common # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
|
||||
|
@ -602,22 +603,25 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
|||
1F850..1F859 ; Common # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
|
||||
1F860..1F887 ; Common # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
|
||||
1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
|
||||
1F900..1F90B ; Common # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
|
||||
1F90D..1F971 ; Common # So [101] WHITE HEART..YAWNING FACE
|
||||
1F973..1F976 ; Common # So [4] FACE WITH PARTY HORN AND PARTY HAT..FREEZING FACE
|
||||
1F97A..1F9A2 ; Common # So [41] FACE WITH PLEADING EYES..SWAN
|
||||
1F9A5..1F9AA ; Common # So [6] SLOTH..OYSTER
|
||||
1F9AE..1F9CA ; Common # So [29] GUIDE DOG..ICE CUBE
|
||||
1F8B0..1F8B1 ; Common # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
|
||||
1F900..1F978 ; Common # So [121] CIRCLED CROSS FORMEE WITH FOUR DOTS..DISGUISED FACE
|
||||
1F97A..1F9CB ; Common # So [82] FACE WITH PLEADING EYES..BUBBLE TEA
|
||||
1F9CD..1FA53 ; Common # So [135] STANDING PERSON..BLACK CHESS KNIGHT-BISHOP
|
||||
1FA60..1FA6D ; Common # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
|
||||
1FA70..1FA73 ; Common # So [4] BALLET SHOES..SHORTS
|
||||
1FA70..1FA74 ; Common # So [5] BALLET SHOES..THONG SANDAL
|
||||
1FA78..1FA7A ; Common # So [3] DROP OF BLOOD..STETHOSCOPE
|
||||
1FA80..1FA82 ; Common # So [3] YO-YO..PARACHUTE
|
||||
1FA90..1FA95 ; Common # So [6] RINGED PLANET..BANJO
|
||||
1FA80..1FA86 ; Common # So [7] YO-YO..NESTING DOLLS
|
||||
1FA90..1FAA8 ; Common # So [25] RINGED PLANET..ROCK
|
||||
1FAB0..1FAB6 ; Common # So [7] FLY..FEATHER
|
||||
1FAC0..1FAC2 ; Common # So [3] ANATOMICAL HEART..PEOPLE HUGGING
|
||||
1FAD0..1FAD6 ; Common # So [7] BLUEBERRIES..TEAPOT
|
||||
1FB00..1FB92 ; Common # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
|
||||
1FB94..1FBCA ; Common # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
|
||||
1FBF0..1FBF9 ; Common # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
|
||||
E0001 ; Common # Cf LANGUAGE TAG
|
||||
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
|
||||
|
||||
# Total code points: 7805
|
||||
# Total code points: 8087
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -661,7 +665,8 @@ A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSU
|
|||
A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
|
||||
A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT
|
||||
A790..A7BF ; Latin # L& [48] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER GLOTTAL U
|
||||
A7C2..A7C6 ; Latin # L& [5] LATIN CAPITAL LETTER ANGLICANA W..LATIN CAPITAL LETTER Z WITH PALATAL HOOK
|
||||
A7C2..A7CA ; Latin # L& [9] LATIN CAPITAL LETTER ANGLICANA W..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
|
||||
A7F5..A7F6 ; Latin # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H
|
||||
A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
|
||||
A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
|
||||
A7FA ; Latin # L& LATIN LETTER SMALL CAPITAL TURNED M
|
||||
|
@ -669,12 +674,13 @@ A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGR
|
|||
AB30..AB5A ; Latin # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
|
||||
AB5C..AB5F ; Latin # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
|
||||
AB60..AB64 ; Latin # L& [5] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER INVERTED ALPHA
|
||||
AB66..AB67 ; Latin # L& [2] LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK..LATIN SMALL LETTER TS DIGRAPH WITH RETROFLEX HOOK
|
||||
AB66..AB68 ; Latin # L& [3] LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE
|
||||
AB69 ; Latin # Lm MODIFIER LETTER SMALL TURNED W
|
||||
FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
|
||||
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
|
||||
|
||||
# Total code points: 1366
|
||||
# Total code points: 1374
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -769,12 +775,13 @@ FE2E..FE2F ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBININ
|
|||
0559 ; Armenian # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
|
||||
055A..055F ; Armenian # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
|
||||
0560..0588 ; Armenian # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE
|
||||
0589 ; Armenian # Po ARMENIAN FULL STOP
|
||||
058A ; Armenian # Pd ARMENIAN HYPHEN
|
||||
058D..058E ; Armenian # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN
|
||||
058F ; Armenian # Sc ARMENIAN DRAM SIGN
|
||||
FB13..FB17 ; Armenian # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
|
||||
|
||||
# Total code points: 95
|
||||
# Total code points: 96
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -837,7 +844,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
|||
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
|
||||
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
|
||||
08A0..08B4 ; Arabic # Lo [21] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER KAF WITH DOT BELOW
|
||||
08B6..08BD ; Arabic # Lo [8] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER AFRICAN NOON
|
||||
08B6..08C7 ; Arabic # Lo [18] ARABIC LETTER BEH WITH SMALL MEEM ABOVE..ARABIC LETTER LAM WITH SMALL ARABIC LETTER TAH ABOVE
|
||||
08D3..08E1 ; Arabic # Mn [15] ARABIC SMALL LOW WAW..ARABIC SMALL HIGH SIGN SAFHA
|
||||
08E3..08FF ; Arabic # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA
|
||||
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
|
||||
|
@ -886,7 +893,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
|
|||
1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
|
||||
1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
|
||||
|
||||
# Total code points: 1281
|
||||
# Total code points: 1291
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1051,7 +1058,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
|
|||
0B47..0B48 ; Oriya # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
|
||||
0B4B..0B4C ; Oriya # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
|
||||
0B4D ; Oriya # Mn ORIYA SIGN VIRAMA
|
||||
0B56 ; Oriya # Mn ORIYA AI LENGTH MARK
|
||||
0B55..0B56 ; Oriya # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK
|
||||
0B57 ; Oriya # Mc ORIYA AU LENGTH MARK
|
||||
0B5C..0B5D ; Oriya # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA
|
||||
0B5F..0B61 ; Oriya # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL
|
||||
|
@ -1061,7 +1068,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
|
|||
0B71 ; Oriya # Lo ORIYA LETTER WA
|
||||
0B72..0B77 ; Oriya # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS
|
||||
|
||||
# Total code points: 90
|
||||
# Total code points: 91
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1155,7 +1162,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
|
|||
|
||||
0D00..0D01 ; Malayalam # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
|
||||
0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
|
||||
0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
|
||||
0D04..0D0C ; Malayalam # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
|
||||
0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
|
||||
0D12..0D3A ; Malayalam # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA
|
||||
0D3B..0D3C ; Malayalam # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
|
||||
|
@ -1177,10 +1184,11 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
|
|||
0D79 ; Malayalam # So MALAYALAM DATE MARK
|
||||
0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
|
||||
|
||||
# Total code points: 117
|
||||
# Total code points: 118
|
||||
|
||||
# ================================================
|
||||
|
||||
0D81 ; Sinhala # Mn SINHALA SIGN CANDRABINDU
|
||||
0D82..0D83 ; Sinhala # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
|
||||
0D85..0D96 ; Sinhala # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
|
||||
0D9A..0DB1 ; Sinhala # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
|
||||
|
@ -1197,7 +1205,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
|
|||
0DF4 ; Sinhala # Po SINHALA PUNCTUATION KUNDDALIYA
|
||||
111E1..111F4 ; Sinhala # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND
|
||||
|
||||
# Total code points: 110
|
||||
# Total code points: 111
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1515,9 +1523,9 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
|||
|
||||
02EA..02EB ; Bopomofo # Sk [2] MODIFIER LETTER YIN DEPARTING TONE MARK..MODIFIER LETTER YANG DEPARTING TONE MARK
|
||||
3105..312F ; Bopomofo # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN
|
||||
31A0..31BA ; Bopomofo # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY
|
||||
31A0..31BF ; Bopomofo # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH
|
||||
|
||||
# Total code points: 72
|
||||
# Total code points: 77
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1529,18 +1537,20 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
|
|||
3021..3029 ; Han # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
|
||||
3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
|
||||
303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
|
||||
3400..4DB5 ; Han # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
|
||||
4E00..9FEF ; Han # Lo [20976] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FEF
|
||||
3400..4DBF ; Han # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF
|
||||
4E00..9FFC ; Han # Lo [20989] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFC
|
||||
F900..FA6D ; Han # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
|
||||
FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
|
||||
20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
|
||||
16FF0..16FF1 ; Han # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY
|
||||
20000..2A6DD ; Han # Lo [42718] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DD
|
||||
2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
|
||||
2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
|
||||
2B820..2CEA1 ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
|
||||
2CEB0..2EBE0 ; Han # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
|
||||
2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
|
||||
|
||||
# Total code points: 89233
|
||||
# Total code points: 94204
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1583,6 +1593,7 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
|
|||
0951..0954 ; Inherited # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
|
||||
1AB0..1ABD ; Inherited # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
|
||||
1ABE ; Inherited # Me COMBINING PARENTHESES OVERLAY
|
||||
1ABF..1AC0 ; Inherited # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW
|
||||
1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
|
||||
1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
|
||||
1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
|
||||
|
@ -1610,7 +1621,7 @@ FE20..FE2D ; Inherited # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CON
|
|||
1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
|
||||
E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 571
|
||||
# Total code points: 573
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1783,8 +1794,9 @@ A823..A824 ; Syloti_Nagri # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI
|
|||
A825..A826 ; Syloti_Nagri # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
|
||||
A827 ; Syloti_Nagri # Mc SYLOTI NAGRI VOWEL SIGN OO
|
||||
A828..A82B ; Syloti_Nagri # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4
|
||||
A82C ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA
|
||||
|
||||
# Total code points: 44
|
||||
# Total code points: 45
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -2063,8 +2075,9 @@ AADE..AADF ; Tai_Viet # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI
|
|||
A4D0..A4F7 ; Lisu # Lo [40] LISU LETTER BA..LISU LETTER OE
|
||||
A4F8..A4FD ; Lisu # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU
|
||||
A4FE..A4FF ; Lisu # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
|
||||
11FB0 ; Lisu # Lo LISU LETTER YHA
|
||||
|
||||
# Total code points: 48
|
||||
# Total code points: 49
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -2217,8 +2230,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
|||
11140..11143 ; Chakma # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK
|
||||
11144 ; Chakma # Lo CHAKMA LETTER LHAA
|
||||
11145..11146 ; Chakma # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI
|
||||
11147 ; Chakma # Lo CHAKMA LETTER VAA
|
||||
|
||||
# Total code points: 70
|
||||
# Total code points: 71
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -2259,13 +2273,15 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
|||
111C5..111C8 ; Sharada # Po [4] SHARADA DANDA..SHARADA SEPARATOR
|
||||
111C9..111CC ; Sharada # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK
|
||||
111CD ; Sharada # Po SHARADA SUTRA MARK
|
||||
111CE ; Sharada # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E
|
||||
111CF ; Sharada # Mn SHARADA SIGN INVERTED CANDRABINDU
|
||||
111D0..111D9 ; Sharada # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE
|
||||
111DA ; Sharada # Lo SHARADA EKAM
|
||||
111DB ; Sharada # Po SHARADA SIGN SIDDHAM
|
||||
111DC ; Sharada # Lo SHARADA HEADSTROKE
|
||||
111DD..111DF ; Sharada # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2
|
||||
|
||||
# Total code points: 94
|
||||
# Total code points: 96
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -2650,12 +2666,12 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
|||
11447..1144A ; Newa # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI
|
||||
1144B..1144F ; Newa # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN
|
||||
11450..11459 ; Newa # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE
|
||||
1145B ; Newa # Po NEWA PLACEHOLDER MARK
|
||||
1145A..1145B ; Newa # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK
|
||||
1145D ; Newa # Po NEWA INSERTION SIGN
|
||||
1145E ; Newa # Mn NEWA SANDHI MARK
|
||||
1145F ; Newa # Lo NEWA LETTER VEDIC ANUSVARA
|
||||
1145F..11461 ; Newa # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA
|
||||
|
||||
# Total code points: 94
|
||||
# Total code points: 97
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -2668,9 +2684,10 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
|||
|
||||
16FE0 ; Tangut # Lm TANGUT ITERATION MARK
|
||||
17000..187F7 ; Tangut # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7
|
||||
18800..18AF2 ; Tangut # Lo [755] TANGUT COMPONENT-001..TANGUT COMPONENT-755
|
||||
18800..18AFF ; Tangut # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768
|
||||
18D00..18D08 ; Tangut # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08
|
||||
|
||||
# Total code points: 6892
|
||||
# Total code points: 6914
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -2835,4 +2852,49 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
|||
|
||||
# Total code points: 59
|
||||
|
||||
# ================================================
|
||||
|
||||
10FB0..10FC4 ; Chorasmian # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW
|
||||
10FC5..10FCB ; Chorasmian # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED
|
||||
|
||||
# Total code points: 28
|
||||
|
||||
# ================================================
|
||||
|
||||
11900..11906 ; Dives_Akuru # Lo [7] DIVES AKURU LETTER A..DIVES AKURU LETTER E
|
||||
11909 ; Dives_Akuru # Lo DIVES AKURU LETTER O
|
||||
1190C..11913 ; Dives_Akuru # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA
|
||||
11915..11916 ; Dives_Akuru # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA
|
||||
11918..1192F ; Dives_Akuru # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA
|
||||
11930..11935 ; Dives_Akuru # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E
|
||||
11937..11938 ; Dives_Akuru # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O
|
||||
1193B..1193C ; Dives_Akuru # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU
|
||||
1193D ; Dives_Akuru # Mc DIVES AKURU SIGN HALANTA
|
||||
1193E ; Dives_Akuru # Mn DIVES AKURU VIRAMA
|
||||
1193F ; Dives_Akuru # Lo DIVES AKURU PREFIXED NASAL SIGN
|
||||
11940 ; Dives_Akuru # Mc DIVES AKURU MEDIAL YA
|
||||
11941 ; Dives_Akuru # Lo DIVES AKURU INITIAL RA
|
||||
11942 ; Dives_Akuru # Mc DIVES AKURU MEDIAL RA
|
||||
11943 ; Dives_Akuru # Mn DIVES AKURU SIGN NUKTA
|
||||
11944..11946 ; Dives_Akuru # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK
|
||||
11950..11959 ; Dives_Akuru # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
|
||||
|
||||
# Total code points: 72
|
||||
|
||||
# ================================================
|
||||
|
||||
16FE4 ; Khitan_Small_Script # Mn KHITAN SMALL SCRIPT FILLER
|
||||
18B00..18CD5 ; Khitan_Small_Script # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5
|
||||
|
||||
# Total code points: 471
|
||||
|
||||
# ================================================
|
||||
|
||||
10E80..10EA9 ; Yezidi # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
|
||||
10EAB..10EAC ; Yezidi # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
|
||||
10EAD ; Yezidi # Pd YEZIDI HYPHENATION MARK
|
||||
10EB0..10EB1 ; Yezidi # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
|
||||
|
||||
# Total code points: 47
|
||||
|
||||
# EOF
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -2,7 +2,7 @@
|
|||
* A program for testing the Unicode property table *
|
||||
***************************************************/
|
||||
|
||||
/* Copyright (c) University of Cambridge 2008-2019 */
|
||||
/* Copyright (c) University of Cambridge 2008-2020 */
|
||||
|
||||
/* Compile thus:
|
||||
|
||||
|
@ -255,7 +255,12 @@ const unsigned char *script_names[] = {
|
|||
US"Elymaic",
|
||||
US"Nandinagari",
|
||||
US"Nyiakeng_Puachue_Hmong",
|
||||
US"Wancho"
|
||||
US"Wancho",
|
||||
/* New for Unicode 13.0.0 */
|
||||
US"Chorasmian",
|
||||
US"Dives_Akuru",
|
||||
US"Khitan_Small_Script",
|
||||
US"Yezidi"
|
||||
};
|
||||
|
||||
const unsigned char *type_names[] = {
|
||||
|
|
|
@ -40,3 +40,9 @@ findprop 11700 14400 108e0 11280 1d800
|
|||
findprop 11800 1e903 11da9 10d27 11ee0 16e48 10f27 10f30
|
||||
|
||||
findprop a836 a833 1cf4 20f0 1cd0
|
||||
|
||||
findprop 32ff
|
||||
|
||||
findprop 1f16d
|
||||
|
||||
findprop 10e93 10eaa
|
||||
|
|
|
@ -386,3 +386,13 @@ a833 Number: Other number, Common, Other, [Devanagari, Dogra, Gujarati, Gurmukhi
|
|||
1cf4 Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Kannada]
|
||||
20f0 Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Latin]
|
||||
1cd0 Mark: Non-spacing mark, Inherited, Extend, [Bengali, Devanagari, Grantha, Kannada]
|
||||
|
||||
findprop 32ff
|
||||
32ff Symbol: Other symbol, Common, Other, [Han]
|
||||
|
||||
findprop 1f16d
|
||||
1f16d Symbol: Other symbol, Common, Extended Pictographic
|
||||
|
||||
findprop 10e93 10eaa
|
||||
10e93 Letter: Other letter, Yezidi, Other
|
||||
10eaa Control: Unassigned, Unknown, Other
|
||||
|
|
|
@ -265,6 +265,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
|
||||
#define STRING_Cham0 STR_C STR_h STR_a STR_m "\0"
|
||||
#define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0"
|
||||
#define STRING_Chorasmian0 STR_C STR_h STR_o STR_r STR_a STR_s STR_m STR_i STR_a STR_n "\0"
|
||||
#define STRING_Cn0 STR_C STR_n "\0"
|
||||
#define STRING_Co0 STR_C STR_o "\0"
|
||||
#define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0"
|
||||
|
@ -275,6 +276,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
|
||||
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
|
||||
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
|
||||
#define STRING_Dives_Akuru0 STR_D STR_i STR_v STR_e STR_s STR_UNDERSCORE STR_A STR_k STR_u STR_r STR_u "\0"
|
||||
#define STRING_Dogra0 STR_D STR_o STR_g STR_r STR_a "\0"
|
||||
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
|
||||
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
|
@ -306,6 +308,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
|
||||
#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
|
||||
#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
|
||||
#define STRING_Khitan_Small_Script0 STR_K STR_h STR_i STR_t STR_a STR_n STR_UNDERSCORE STR_S STR_m STR_a STR_l STR_l STR_UNDERSCORE STR_S STR_c STR_r STR_i STR_p STR_t "\0"
|
||||
#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
|
||||
#define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0"
|
||||
#define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
|
||||
|
@ -429,6 +432,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
|
||||
#define STRING_Xuc0 STR_X STR_u STR_c "\0"
|
||||
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
|
||||
#define STRING_Yezidi0 STR_Y STR_e STR_z STR_i STR_d STR_i "\0"
|
||||
#define STRING_Yi0 STR_Y STR_i "\0"
|
||||
#define STRING_Z0 STR_Z "\0"
|
||||
#define STRING_Zanabazar_Square0 STR_Z STR_a STR_n STR_a STR_b STR_a STR_z STR_a STR_r STR_UNDERSCORE STR_S STR_q STR_u STR_a STR_r STR_e "\0"
|
||||
|
@ -464,6 +468,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Chakma0
|
||||
STRING_Cham0
|
||||
STRING_Cherokee0
|
||||
STRING_Chorasmian0
|
||||
STRING_Cn0
|
||||
STRING_Co0
|
||||
STRING_Common0
|
||||
|
@ -474,6 +479,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Cyrillic0
|
||||
STRING_Deseret0
|
||||
STRING_Devanagari0
|
||||
STRING_Dives_Akuru0
|
||||
STRING_Dogra0
|
||||
STRING_Duployan0
|
||||
STRING_Egyptian_Hieroglyphs0
|
||||
|
@ -505,6 +511,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Katakana0
|
||||
STRING_Kayah_Li0
|
||||
STRING_Kharoshthi0
|
||||
STRING_Khitan_Small_Script0
|
||||
STRING_Khmer0
|
||||
STRING_Khojki0
|
||||
STRING_Khudawadi0
|
||||
|
@ -628,6 +635,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Xsp0
|
||||
STRING_Xuc0
|
||||
STRING_Xwd0
|
||||
STRING_Yezidi0
|
||||
STRING_Yi0
|
||||
STRING_Z0
|
||||
STRING_Zanabazar_Square0
|
||||
|
@ -663,176 +671,180 @@ const ucp_type_table PRIV(utt)[] = {
|
|||
{ 203, PT_SC, ucp_Chakma },
|
||||
{ 210, PT_SC, ucp_Cham },
|
||||
{ 215, PT_SC, ucp_Cherokee },
|
||||
{ 224, PT_PC, ucp_Cn },
|
||||
{ 227, PT_PC, ucp_Co },
|
||||
{ 230, PT_SC, ucp_Common },
|
||||
{ 237, PT_SC, ucp_Coptic },
|
||||
{ 244, PT_PC, ucp_Cs },
|
||||
{ 247, PT_SC, ucp_Cuneiform },
|
||||
{ 257, PT_SC, ucp_Cypriot },
|
||||
{ 265, PT_SC, ucp_Cyrillic },
|
||||
{ 274, PT_SC, ucp_Deseret },
|
||||
{ 282, PT_SC, ucp_Devanagari },
|
||||
{ 293, PT_SC, ucp_Dogra },
|
||||
{ 299, PT_SC, ucp_Duployan },
|
||||
{ 308, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 329, PT_SC, ucp_Elbasan },
|
||||
{ 337, PT_SC, ucp_Elymaic },
|
||||
{ 345, PT_SC, ucp_Ethiopic },
|
||||
{ 354, PT_SC, ucp_Georgian },
|
||||
{ 363, PT_SC, ucp_Glagolitic },
|
||||
{ 374, PT_SC, ucp_Gothic },
|
||||
{ 381, PT_SC, ucp_Grantha },
|
||||
{ 389, PT_SC, ucp_Greek },
|
||||
{ 395, PT_SC, ucp_Gujarati },
|
||||
{ 404, PT_SC, ucp_Gunjala_Gondi },
|
||||
{ 418, PT_SC, ucp_Gurmukhi },
|
||||
{ 427, PT_SC, ucp_Han },
|
||||
{ 431, PT_SC, ucp_Hangul },
|
||||
{ 438, PT_SC, ucp_Hanifi_Rohingya },
|
||||
{ 454, PT_SC, ucp_Hanunoo },
|
||||
{ 462, PT_SC, ucp_Hatran },
|
||||
{ 469, PT_SC, ucp_Hebrew },
|
||||
{ 476, PT_SC, ucp_Hiragana },
|
||||
{ 485, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 502, PT_SC, ucp_Inherited },
|
||||
{ 512, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 534, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 557, PT_SC, ucp_Javanese },
|
||||
{ 566, PT_SC, ucp_Kaithi },
|
||||
{ 573, PT_SC, ucp_Kannada },
|
||||
{ 581, PT_SC, ucp_Katakana },
|
||||
{ 590, PT_SC, ucp_Kayah_Li },
|
||||
{ 599, PT_SC, ucp_Kharoshthi },
|
||||
{ 610, PT_SC, ucp_Khmer },
|
||||
{ 616, PT_SC, ucp_Khojki },
|
||||
{ 623, PT_SC, ucp_Khudawadi },
|
||||
{ 633, PT_GC, ucp_L },
|
||||
{ 635, PT_LAMP, 0 },
|
||||
{ 638, PT_SC, ucp_Lao },
|
||||
{ 642, PT_SC, ucp_Latin },
|
||||
{ 648, PT_SC, ucp_Lepcha },
|
||||
{ 655, PT_SC, ucp_Limbu },
|
||||
{ 661, PT_SC, ucp_Linear_A },
|
||||
{ 670, PT_SC, ucp_Linear_B },
|
||||
{ 679, PT_SC, ucp_Lisu },
|
||||
{ 684, PT_PC, ucp_Ll },
|
||||
{ 687, PT_PC, ucp_Lm },
|
||||
{ 690, PT_PC, ucp_Lo },
|
||||
{ 693, PT_PC, ucp_Lt },
|
||||
{ 696, PT_PC, ucp_Lu },
|
||||
{ 699, PT_SC, ucp_Lycian },
|
||||
{ 706, PT_SC, ucp_Lydian },
|
||||
{ 713, PT_GC, ucp_M },
|
||||
{ 715, PT_SC, ucp_Mahajani },
|
||||
{ 724, PT_SC, ucp_Makasar },
|
||||
{ 732, PT_SC, ucp_Malayalam },
|
||||
{ 742, PT_SC, ucp_Mandaic },
|
||||
{ 750, PT_SC, ucp_Manichaean },
|
||||
{ 761, PT_SC, ucp_Marchen },
|
||||
{ 769, PT_SC, ucp_Masaram_Gondi },
|
||||
{ 783, PT_PC, ucp_Mc },
|
||||
{ 786, PT_PC, ucp_Me },
|
||||
{ 789, PT_SC, ucp_Medefaidrin },
|
||||
{ 801, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 814, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 828, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 845, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 866, PT_SC, ucp_Miao },
|
||||
{ 871, PT_PC, ucp_Mn },
|
||||
{ 874, PT_SC, ucp_Modi },
|
||||
{ 879, PT_SC, ucp_Mongolian },
|
||||
{ 889, PT_SC, ucp_Mro },
|
||||
{ 893, PT_SC, ucp_Multani },
|
||||
{ 901, PT_SC, ucp_Myanmar },
|
||||
{ 909, PT_GC, ucp_N },
|
||||
{ 911, PT_SC, ucp_Nabataean },
|
||||
{ 921, PT_SC, ucp_Nandinagari },
|
||||
{ 933, PT_PC, ucp_Nd },
|
||||
{ 936, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 948, PT_SC, ucp_Newa },
|
||||
{ 953, PT_SC, ucp_Nko },
|
||||
{ 957, PT_PC, ucp_Nl },
|
||||
{ 960, PT_PC, ucp_No },
|
||||
{ 963, PT_SC, ucp_Nushu },
|
||||
{ 969, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
|
||||
{ 992, PT_SC, ucp_Ogham },
|
||||
{ 998, PT_SC, ucp_Ol_Chiki },
|
||||
{ 1007, PT_SC, ucp_Old_Hungarian },
|
||||
{ 1021, PT_SC, ucp_Old_Italic },
|
||||
{ 1032, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 1050, PT_SC, ucp_Old_Permic },
|
||||
{ 1061, PT_SC, ucp_Old_Persian },
|
||||
{ 1073, PT_SC, ucp_Old_Sogdian },
|
||||
{ 1085, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 1103, PT_SC, ucp_Old_Turkic },
|
||||
{ 1114, PT_SC, ucp_Oriya },
|
||||
{ 1120, PT_SC, ucp_Osage },
|
||||
{ 1126, PT_SC, ucp_Osmanya },
|
||||
{ 1134, PT_GC, ucp_P },
|
||||
{ 1136, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 1149, PT_SC, ucp_Palmyrene },
|
||||
{ 1159, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 1171, PT_PC, ucp_Pc },
|
||||
{ 1174, PT_PC, ucp_Pd },
|
||||
{ 1177, PT_PC, ucp_Pe },
|
||||
{ 1180, PT_PC, ucp_Pf },
|
||||
{ 1183, PT_SC, ucp_Phags_Pa },
|
||||
{ 1192, PT_SC, ucp_Phoenician },
|
||||
{ 1203, PT_PC, ucp_Pi },
|
||||
{ 1206, PT_PC, ucp_Po },
|
||||
{ 1209, PT_PC, ucp_Ps },
|
||||
{ 1212, PT_SC, ucp_Psalter_Pahlavi },
|
||||
{ 1228, PT_SC, ucp_Rejang },
|
||||
{ 1235, PT_SC, ucp_Runic },
|
||||
{ 1241, PT_GC, ucp_S },
|
||||
{ 1243, PT_SC, ucp_Samaritan },
|
||||
{ 1253, PT_SC, ucp_Saurashtra },
|
||||
{ 1264, PT_PC, ucp_Sc },
|
||||
{ 1267, PT_SC, ucp_Sharada },
|
||||
{ 1275, PT_SC, ucp_Shavian },
|
||||
{ 1283, PT_SC, ucp_Siddham },
|
||||
{ 1291, PT_SC, ucp_SignWriting },
|
||||
{ 1303, PT_SC, ucp_Sinhala },
|
||||
{ 1311, PT_PC, ucp_Sk },
|
||||
{ 1314, PT_PC, ucp_Sm },
|
||||
{ 1317, PT_PC, ucp_So },
|
||||
{ 1320, PT_SC, ucp_Sogdian },
|
||||
{ 1328, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 1341, PT_SC, ucp_Soyombo },
|
||||
{ 1349, PT_SC, ucp_Sundanese },
|
||||
{ 1359, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 1372, PT_SC, ucp_Syriac },
|
||||
{ 1379, PT_SC, ucp_Tagalog },
|
||||
{ 1387, PT_SC, ucp_Tagbanwa },
|
||||
{ 1396, PT_SC, ucp_Tai_Le },
|
||||
{ 1403, PT_SC, ucp_Tai_Tham },
|
||||
{ 1412, PT_SC, ucp_Tai_Viet },
|
||||
{ 1421, PT_SC, ucp_Takri },
|
||||
{ 1427, PT_SC, ucp_Tamil },
|
||||
{ 1433, PT_SC, ucp_Tangut },
|
||||
{ 1440, PT_SC, ucp_Telugu },
|
||||
{ 1447, PT_SC, ucp_Thaana },
|
||||
{ 1454, PT_SC, ucp_Thai },
|
||||
{ 1459, PT_SC, ucp_Tibetan },
|
||||
{ 1467, PT_SC, ucp_Tifinagh },
|
||||
{ 1476, PT_SC, ucp_Tirhuta },
|
||||
{ 1484, PT_SC, ucp_Ugaritic },
|
||||
{ 1493, PT_SC, ucp_Unknown },
|
||||
{ 1501, PT_SC, ucp_Vai },
|
||||
{ 1505, PT_SC, ucp_Wancho },
|
||||
{ 1512, PT_SC, ucp_Warang_Citi },
|
||||
{ 1524, PT_ALNUM, 0 },
|
||||
{ 1528, PT_PXSPACE, 0 },
|
||||
{ 1532, PT_SPACE, 0 },
|
||||
{ 1536, PT_UCNC, 0 },
|
||||
{ 1540, PT_WORD, 0 },
|
||||
{ 1544, PT_SC, ucp_Yi },
|
||||
{ 1547, PT_GC, ucp_Z },
|
||||
{ 1549, PT_SC, ucp_Zanabazar_Square },
|
||||
{ 1566, PT_PC, ucp_Zl },
|
||||
{ 1569, PT_PC, ucp_Zp },
|
||||
{ 1572, PT_PC, ucp_Zs }
|
||||
{ 224, PT_SC, ucp_Chorasmian },
|
||||
{ 235, PT_PC, ucp_Cn },
|
||||
{ 238, PT_PC, ucp_Co },
|
||||
{ 241, PT_SC, ucp_Common },
|
||||
{ 248, PT_SC, ucp_Coptic },
|
||||
{ 255, PT_PC, ucp_Cs },
|
||||
{ 258, PT_SC, ucp_Cuneiform },
|
||||
{ 268, PT_SC, ucp_Cypriot },
|
||||
{ 276, PT_SC, ucp_Cyrillic },
|
||||
{ 285, PT_SC, ucp_Deseret },
|
||||
{ 293, PT_SC, ucp_Devanagari },
|
||||
{ 304, PT_SC, ucp_Dives_Akuru },
|
||||
{ 316, PT_SC, ucp_Dogra },
|
||||
{ 322, PT_SC, ucp_Duployan },
|
||||
{ 331, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 352, PT_SC, ucp_Elbasan },
|
||||
{ 360, PT_SC, ucp_Elymaic },
|
||||
{ 368, PT_SC, ucp_Ethiopic },
|
||||
{ 377, PT_SC, ucp_Georgian },
|
||||
{ 386, PT_SC, ucp_Glagolitic },
|
||||
{ 397, PT_SC, ucp_Gothic },
|
||||
{ 404, PT_SC, ucp_Grantha },
|
||||
{ 412, PT_SC, ucp_Greek },
|
||||
{ 418, PT_SC, ucp_Gujarati },
|
||||
{ 427, PT_SC, ucp_Gunjala_Gondi },
|
||||
{ 441, PT_SC, ucp_Gurmukhi },
|
||||
{ 450, PT_SC, ucp_Han },
|
||||
{ 454, PT_SC, ucp_Hangul },
|
||||
{ 461, PT_SC, ucp_Hanifi_Rohingya },
|
||||
{ 477, PT_SC, ucp_Hanunoo },
|
||||
{ 485, PT_SC, ucp_Hatran },
|
||||
{ 492, PT_SC, ucp_Hebrew },
|
||||
{ 499, PT_SC, ucp_Hiragana },
|
||||
{ 508, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 525, PT_SC, ucp_Inherited },
|
||||
{ 535, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 557, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 580, PT_SC, ucp_Javanese },
|
||||
{ 589, PT_SC, ucp_Kaithi },
|
||||
{ 596, PT_SC, ucp_Kannada },
|
||||
{ 604, PT_SC, ucp_Katakana },
|
||||
{ 613, PT_SC, ucp_Kayah_Li },
|
||||
{ 622, PT_SC, ucp_Kharoshthi },
|
||||
{ 633, PT_SC, ucp_Khitan_Small_Script },
|
||||
{ 653, PT_SC, ucp_Khmer },
|
||||
{ 659, PT_SC, ucp_Khojki },
|
||||
{ 666, PT_SC, ucp_Khudawadi },
|
||||
{ 676, PT_GC, ucp_L },
|
||||
{ 678, PT_LAMP, 0 },
|
||||
{ 681, PT_SC, ucp_Lao },
|
||||
{ 685, PT_SC, ucp_Latin },
|
||||
{ 691, PT_SC, ucp_Lepcha },
|
||||
{ 698, PT_SC, ucp_Limbu },
|
||||
{ 704, PT_SC, ucp_Linear_A },
|
||||
{ 713, PT_SC, ucp_Linear_B },
|
||||
{ 722, PT_SC, ucp_Lisu },
|
||||
{ 727, PT_PC, ucp_Ll },
|
||||
{ 730, PT_PC, ucp_Lm },
|
||||
{ 733, PT_PC, ucp_Lo },
|
||||
{ 736, PT_PC, ucp_Lt },
|
||||
{ 739, PT_PC, ucp_Lu },
|
||||
{ 742, PT_SC, ucp_Lycian },
|
||||
{ 749, PT_SC, ucp_Lydian },
|
||||
{ 756, PT_GC, ucp_M },
|
||||
{ 758, PT_SC, ucp_Mahajani },
|
||||
{ 767, PT_SC, ucp_Makasar },
|
||||
{ 775, PT_SC, ucp_Malayalam },
|
||||
{ 785, PT_SC, ucp_Mandaic },
|
||||
{ 793, PT_SC, ucp_Manichaean },
|
||||
{ 804, PT_SC, ucp_Marchen },
|
||||
{ 812, PT_SC, ucp_Masaram_Gondi },
|
||||
{ 826, PT_PC, ucp_Mc },
|
||||
{ 829, PT_PC, ucp_Me },
|
||||
{ 832, PT_SC, ucp_Medefaidrin },
|
||||
{ 844, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 857, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 871, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 888, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 909, PT_SC, ucp_Miao },
|
||||
{ 914, PT_PC, ucp_Mn },
|
||||
{ 917, PT_SC, ucp_Modi },
|
||||
{ 922, PT_SC, ucp_Mongolian },
|
||||
{ 932, PT_SC, ucp_Mro },
|
||||
{ 936, PT_SC, ucp_Multani },
|
||||
{ 944, PT_SC, ucp_Myanmar },
|
||||
{ 952, PT_GC, ucp_N },
|
||||
{ 954, PT_SC, ucp_Nabataean },
|
||||
{ 964, PT_SC, ucp_Nandinagari },
|
||||
{ 976, PT_PC, ucp_Nd },
|
||||
{ 979, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 991, PT_SC, ucp_Newa },
|
||||
{ 996, PT_SC, ucp_Nko },
|
||||
{ 1000, PT_PC, ucp_Nl },
|
||||
{ 1003, PT_PC, ucp_No },
|
||||
{ 1006, PT_SC, ucp_Nushu },
|
||||
{ 1012, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
|
||||
{ 1035, PT_SC, ucp_Ogham },
|
||||
{ 1041, PT_SC, ucp_Ol_Chiki },
|
||||
{ 1050, PT_SC, ucp_Old_Hungarian },
|
||||
{ 1064, PT_SC, ucp_Old_Italic },
|
||||
{ 1075, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 1093, PT_SC, ucp_Old_Permic },
|
||||
{ 1104, PT_SC, ucp_Old_Persian },
|
||||
{ 1116, PT_SC, ucp_Old_Sogdian },
|
||||
{ 1128, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 1146, PT_SC, ucp_Old_Turkic },
|
||||
{ 1157, PT_SC, ucp_Oriya },
|
||||
{ 1163, PT_SC, ucp_Osage },
|
||||
{ 1169, PT_SC, ucp_Osmanya },
|
||||
{ 1177, PT_GC, ucp_P },
|
||||
{ 1179, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 1192, PT_SC, ucp_Palmyrene },
|
||||
{ 1202, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 1214, PT_PC, ucp_Pc },
|
||||
{ 1217, PT_PC, ucp_Pd },
|
||||
{ 1220, PT_PC, ucp_Pe },
|
||||
{ 1223, PT_PC, ucp_Pf },
|
||||
{ 1226, PT_SC, ucp_Phags_Pa },
|
||||
{ 1235, PT_SC, ucp_Phoenician },
|
||||
{ 1246, PT_PC, ucp_Pi },
|
||||
{ 1249, PT_PC, ucp_Po },
|
||||
{ 1252, PT_PC, ucp_Ps },
|
||||
{ 1255, PT_SC, ucp_Psalter_Pahlavi },
|
||||
{ 1271, PT_SC, ucp_Rejang },
|
||||
{ 1278, PT_SC, ucp_Runic },
|
||||
{ 1284, PT_GC, ucp_S },
|
||||
{ 1286, PT_SC, ucp_Samaritan },
|
||||
{ 1296, PT_SC, ucp_Saurashtra },
|
||||
{ 1307, PT_PC, ucp_Sc },
|
||||
{ 1310, PT_SC, ucp_Sharada },
|
||||
{ 1318, PT_SC, ucp_Shavian },
|
||||
{ 1326, PT_SC, ucp_Siddham },
|
||||
{ 1334, PT_SC, ucp_SignWriting },
|
||||
{ 1346, PT_SC, ucp_Sinhala },
|
||||
{ 1354, PT_PC, ucp_Sk },
|
||||
{ 1357, PT_PC, ucp_Sm },
|
||||
{ 1360, PT_PC, ucp_So },
|
||||
{ 1363, PT_SC, ucp_Sogdian },
|
||||
{ 1371, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 1384, PT_SC, ucp_Soyombo },
|
||||
{ 1392, PT_SC, ucp_Sundanese },
|
||||
{ 1402, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 1415, PT_SC, ucp_Syriac },
|
||||
{ 1422, PT_SC, ucp_Tagalog },
|
||||
{ 1430, PT_SC, ucp_Tagbanwa },
|
||||
{ 1439, PT_SC, ucp_Tai_Le },
|
||||
{ 1446, PT_SC, ucp_Tai_Tham },
|
||||
{ 1455, PT_SC, ucp_Tai_Viet },
|
||||
{ 1464, PT_SC, ucp_Takri },
|
||||
{ 1470, PT_SC, ucp_Tamil },
|
||||
{ 1476, PT_SC, ucp_Tangut },
|
||||
{ 1483, PT_SC, ucp_Telugu },
|
||||
{ 1490, PT_SC, ucp_Thaana },
|
||||
{ 1497, PT_SC, ucp_Thai },
|
||||
{ 1502, PT_SC, ucp_Tibetan },
|
||||
{ 1510, PT_SC, ucp_Tifinagh },
|
||||
{ 1519, PT_SC, ucp_Tirhuta },
|
||||
{ 1527, PT_SC, ucp_Ugaritic },
|
||||
{ 1536, PT_SC, ucp_Unknown },
|
||||
{ 1544, PT_SC, ucp_Vai },
|
||||
{ 1548, PT_SC, ucp_Wancho },
|
||||
{ 1555, PT_SC, ucp_Warang_Citi },
|
||||
{ 1567, PT_ALNUM, 0 },
|
||||
{ 1571, PT_PXSPACE, 0 },
|
||||
{ 1575, PT_SPACE, 0 },
|
||||
{ 1579, PT_UCNC, 0 },
|
||||
{ 1583, PT_WORD, 0 },
|
||||
{ 1587, PT_SC, ucp_Yezidi },
|
||||
{ 1594, PT_SC, ucp_Yi },
|
||||
{ 1597, PT_GC, ucp_Z },
|
||||
{ 1599, PT_SC, ucp_Zanabazar_Square },
|
||||
{ 1616, PT_PC, ucp_Zl },
|
||||
{ 1619, PT_PC, ucp_Zp },
|
||||
{ 1622, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
|
|
4201
src/pcre2_ucd.c
4201
src/pcre2_ucd.c
File diff suppressed because it is too large
Load Diff
|
@ -286,7 +286,12 @@ enum {
|
|||
ucp_Elymaic,
|
||||
ucp_Nandinagari,
|
||||
ucp_Nyiakeng_Puachue_Hmong,
|
||||
ucp_Wancho
|
||||
ucp_Wancho,
|
||||
/* New for Unicode 13.0.0 */
|
||||
ucp_Chorasmian,
|
||||
ucp_Dives_Akuru,
|
||||
ucp_Khitan_Small_Script,
|
||||
ucp_Yezidi
|
||||
};
|
||||
|
||||
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
|
||||
|
|
|
@ -804,10 +804,10 @@
|
|||
\x{4d00}
|
||||
\x{4db4}
|
||||
\x{4db5}
|
||||
\x{4db6}
|
||||
\= Expect no match
|
||||
a
|
||||
\x{2b0}
|
||||
\x{4db6}
|
||||
|
||||
/^\p{Lt}/utf
|
||||
\x{1c5}
|
||||
|
|
|
@ -2081,7 +2081,6 @@
|
|||
\x{655}
|
||||
|
||||
/^\p{Common}/utf
|
||||
\x{589}
|
||||
\x{60c}
|
||||
\x{61f}
|
||||
\x{964}
|
||||
|
@ -2158,6 +2157,11 @@
|
|||
/\p{Elymaic}\p{Nandinagari}\p{Nyiakeng_Puachue_Hmong}\p{Wancho}/utf
|
||||
\x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1}
|
||||
|
||||
# Some Unicode 13.0.0 new script characters
|
||||
|
||||
/\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf
|
||||
\x{10FB0}\x{11900}\x{18B00}\x{10E80}
|
||||
|
||||
# -------
|
||||
|
||||
# Test reference and errors in non-ASCII characters in group names
|
||||
|
|
|
@ -1342,13 +1342,13 @@ No match
|
|||
0: \x{4db4}
|
||||
\x{4db5}
|
||||
0: \x{4db5}
|
||||
\x{4db6}
|
||||
0: \x{4db6}
|
||||
\= Expect no match
|
||||
a
|
||||
No match
|
||||
\x{2b0}
|
||||
No match
|
||||
\x{4db6}
|
||||
No match
|
||||
|
||||
/^\p{Lt}/utf
|
||||
\x{1c5}
|
||||
|
|
|
@ -4736,8 +4736,6 @@ No match
|
|||
No match
|
||||
|
||||
/^\p{Common}/utf
|
||||
\x{589}
|
||||
0: \x{589}
|
||||
\x{60c}
|
||||
0: \x{60c}
|
||||
\x{61f}
|
||||
|
@ -4900,6 +4898,12 @@ MK: ABC
|
|||
\x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1}
|
||||
0: \x{10fe5}\x{119ac}\x{1e10e}\x{1e2d1}
|
||||
|
||||
# Some Unicode 13.0.0 new script characters
|
||||
|
||||
/\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf
|
||||
\x{10FB0}\x{11900}\x{18B00}\x{10E80}
|
||||
0: \x{10fb0}\x{11900}\x{18b00}\x{10e80}
|
||||
|
||||
# -------
|
||||
|
||||
# Test reference and errors in non-ASCII characters in group names
|
||||
|
|
Loading…
Reference in New Issue