Update for Unicode 7.0.0

This commit is contained in:
Philip.Hazel 2014-06-20 12:40:32 +00:00
parent 1abd5a7f8d
commit bf2bc83ed8
13 changed files with 6732 additions and 2794 deletions

View File

@ -21,6 +21,7 @@
# Modfied by PH 26-February-2013 to add the Xuc special category.
# Comment modified by PH 13-May-2014 to update to PCRE2 file names.
# Script updated to Python 3 by running it through the 2to3 converter.
# Added script names for Unicode 7.0.0, 20-June-2014.
script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \
'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \
@ -41,7 +42,12 @@ script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Bugines
# New for Unicode 6.0.0
'Batak', 'Brahmi', 'Mandaic', \
# New for Unicode 6.1.0
'Chakma', 'Meroitic_Cursive', 'Meroitic_Hieroglyphs', 'Miao', 'Sharada', 'Sora_Sompeng', 'Takri'
'Chakma', 'Meroitic_Cursive', 'Meroitic_Hieroglyphs', 'Miao', 'Sharada', 'Sora_Sompeng', 'Takri',
# New for Unicode 7.0.0
'Bassa_Vah', 'Caucasian_Albanian', 'Duployan', 'Elbasan', 'Grantha', 'Khojki', 'Khudawadi',
'Linear_A', 'Mahajani', 'Manichaean', 'Mende_Kikakui', 'Modi', 'Mro', 'Nabataean',
'Old_North_Arabian', 'Old_Permic', 'Pahawh_Hmong', 'Palmyrene', 'Psalter_Pahlavi',
'Pau_Cin_Hau', 'Siddham', 'Tirhuta', 'Warang_Citi'
]
category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',

View File

@ -33,7 +33,7 @@
# removed completely in 2012.
# Corrected size calculation
# Add #ifndef SUPPORT_UCP to use dummy tables when no UCP support is needed.
# Update for PCRE2: name changes and SUPPORT_UCP is abolished.
# Update for PCRE2: name changes, and SUPPORT_UCP is abolished.
#
# Major modifications made to this script:
# Added code to add a grapheme break property field to records.
@ -119,6 +119,7 @@
# 30-September-2012: Added RegionalIndicator break property from Unicode 6.2.0
# 13-May-2014: Updated for PCRE2
# 03-June-2014: Updated for Python 3
# 20-June-2014: Updated for Unicode 7.0.0
##############################################################################
@ -310,7 +311,12 @@ script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Bugines
# New for Unicode 6.0.0
'Batak', 'Brahmi', 'Mandaic', \
# New for Unicode 6.1.0
'Chakma', 'Meroitic_Cursive', 'Meroitic_Hieroglyphs', 'Miao', 'Sharada', 'Sora_Sompeng', 'Takri'
'Chakma', 'Meroitic_Cursive', 'Meroitic_Hieroglyphs', 'Miao', 'Sharada', 'Sora_Sompeng', 'Takri',
# New for Unicode 7.0.0
'Bassa_Vah', 'Caucasian_Albanian', 'Duployan', 'Elbasan', 'Grantha', 'Khojki', 'Khudawadi',
'Linear_A', 'Mahajani', 'Manichaean', 'Mende_Kikakui', 'Modi', 'Mro', 'Nabataean',
'Old_North_Arabian', 'Old_Permic', 'Pahawh_Hmong', 'Palmyrene', 'Psalter_Pahlavi',
'Pau_Cin_Hau', 'Siddham', 'Tirhuta', 'Warang_Citi'
]
category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
@ -427,7 +433,7 @@ print("table names from _pcre2_xxx to xxxx, thereby avoiding name clashes")
print("with the library. At present, just one of these tables is actually")
print("needed. */")
print()
print("#ifndef PCRE2_INCLUDED")
print("#ifndef PCRE2_PCRE2TEST")
print()
print("#ifdef HAVE_CONFIG_H")
print("#include \"config.h\"")
@ -435,7 +441,7 @@ print("#endif")
print()
print("#include \"pcre2_internal.h\"")
print()
print("#endif /* PCRE2_INCLUDED */")
print("#endif /* PCRE2_PCRE2TEST */")
print()
print("/* Unicode character database. */")
print("/* This file was autogenerated by the MultiStage2.py script. */")
@ -476,7 +482,7 @@ print()
print("/* When #included in pcre2test, we don't need this large table. */")
print()
print("#ifndef PCRE2_INCLUDED")
print("#ifndef PCRE2_PCRE2TEST")
print()
print_records(records, record_size)
print_table(min_stage1, 'PRIV(ucd_stage1)')
@ -486,7 +492,7 @@ print("#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h")
print("#endif")
print("#endif /* SUPPORT_UTF */")
print()
print("#endif /* PCRE2_INCLUDED */")
print("#endif /* PCRE2_PCRE2TEST */")
"""

View File

@ -1,8 +1,8 @@
# CaseFolding-6.3.0.txt
# Date: 2012-12-20, 22:14:35 GMT [MD]
# CaseFolding-7.0.0.txt
# Date: 2014-04-09, 20:00:56 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2013 Unicode, Inc.
# Copyright (c) 1991-2014 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
@ -25,7 +25,7 @@
#
# For information on case folding, including how to have case folding
# preserve normalization formats, see Section 3.13 Default Case Algorithms in
# The Unicode Standard, Version 5.0.
# The Unicode Standard.
#
# ================================================================================
# Format
@ -58,8 +58,6 @@
# All code points not explicitly listed for Case_Folding
# have the value C for the status field, and the code point itself for the mapping field.
# @missing: 0000..10FFFF; C; <code point>
# =================================================================
0041; C; 0061; # LATIN CAPITAL LETTER A
0042; C; 0062; # LATIN CAPITAL LETTER B
@ -298,6 +296,7 @@
0370; C; 0371; # GREEK CAPITAL LETTER HETA
0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI
0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
037F; C; 03F3; # GREEK CAPITAL LETTER YOT
0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
@ -505,6 +504,10 @@
0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER
0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK
052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE
052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE
052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER
0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
@ -1088,6 +1091,8 @@ A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE
A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE
A694; C; A695; # CYRILLIC CAPITAL LETTER HWE
A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE
A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O
A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O
A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
A726; C; A727; # LATIN CAPITAL LETTER HENG
@ -1138,12 +1143,22 @@ A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO
A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H
A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER
A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR
A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH
A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE
A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE
A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE
A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE
A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK
A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E
A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G
A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT
A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K
A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T
FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
@ -1222,5 +1237,37 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
10425; C; 1044D; # DESERET CAPITAL LETTER ENG
10426; C; 1044E; # DESERET CAPITAL LETTER OI
10427; C; 1044F; # DESERET CAPITAL LETTER EW
118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA
118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A
118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI
118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU
118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA
118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO
118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II
118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU
118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E
118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O
118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG
118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA
118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO
118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY
118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ
118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC
118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN
118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD
118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE
118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG
118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA
118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT
118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM
118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU
118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU
118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO
118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO
118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR
118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR
118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU
118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII
118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO
#
# EOF

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,8 @@
# GraphemeBreakProperty-6.3.0.txt
# Date: 2013-03-02, 16:07:40 GMT [MD]
# GraphemeBreakProperty-7.0.0.txt
# Date: 2014-02-19, 15:51:21 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2013 Unicode, Inc.
# Copyright (c) 1991-2014 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
@ -34,7 +34,7 @@
000E..001F ; Control # Cc [18] <control-000E>..<control-001F>
007F..009F ; Control # Cc [33] <control-007F>..<control-009F>
00AD ; Control # Cf SOFT HYPHEN
0600..0604 ; Control # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT
0600..0605 ; Control # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
061C ; Control # Cf ARABIC LETTER MARK
06DD ; Control # Cf ARABIC END OF AYAH
070F ; Control # Cf SYRIAC ABBREVIATION MARK
@ -52,6 +52,7 @@ FEFF ; Control # Cf ZERO WIDTH NO-BREAK SPACE
FFF0..FFF8 ; Control # Cn [9] <reserved-FFF0>..<reserved-FFF8>
FFF9..FFFB ; Control # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
110BD ; Control # Cf KAITHI NUMBER SIGN
1BCA0..1BCA3 ; Control # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
1D173..1D17A ; Control # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
E0000 ; Control # Cn <reserved-E0000>
E0001 ; Control # Cf LANGUAGE TAG
@ -60,7 +61,7 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG
E0080..E00FF ; Control # Cn [128] <reserved-E0080>..<reserved-E00FF>
E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
# Total code points: 6025
# Total code points: 6030
# ================================================
@ -88,8 +89,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
08E4..08FE ; Extend # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT
0900..0902 ; Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
08E4..0902 ; Extend # Mn [31] ARABIC CURLY FATHA..DEVANAGARI SIGN ANUSVARA
093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE
093C ; Extend # Mn DEVANAGARI SIGN NUKTA
0941..0948 ; Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
@ -131,11 +131,13 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0BC0 ; Extend # Mn TAMIL VOWEL SIGN II
0BCD ; Extend # Mn TAMIL SIGN VIRAMA
0BD7 ; Extend # Mc TAMIL AU LENGTH MARK
0C00 ; Extend # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE
0C3E..0C40 ; Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
0C46..0C48 ; Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
0C4A..0C4D ; Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
0C55..0C56 ; Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0C62..0C63 ; Extend # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0C81 ; Extend # Mn KANNADA SIGN CANDRABINDU
0CBC ; Extend # Mn KANNADA SIGN NUKTA
0CBF ; Extend # Mn KANNADA VOWEL SIGN I
0CC2 ; Extend # Mc KANNADA VOWEL SIGN UU
@ -143,6 +145,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0D01 ; Extend # Mn MALAYALAM SIGN CANDRABINDU
0D3E ; Extend # Mc MALAYALAM VOWEL SIGN AA
0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA
@ -206,6 +209,8 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
1A65..1A6C ; Extend # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
1A73..1A7C ; Extend # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY
1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B34 ; Extend # Mn BALINESE SIGN REREKAN
1B36..1B3A ; Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
@ -215,7 +220,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
1B80..1B81 ; Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
1BA2..1BA5 ; Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
1BAB ; Extend # Mn SUNDANESE SIGN VIRAMA
1BAB..1BAD ; Extend # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA
1BE6 ; Extend # Mn BATAK SIGN TOMPI
1BE8..1BE9 ; Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE
1BED ; Extend # Mn BATAK VOWEL SIGN KARO O
@ -227,7 +232,8 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; Extend # Mn VEDIC SIGN TIRYAK
1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE
1DC0..1DE6 ; Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
1DC0..1DF5 ; Extend # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE
1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
@ -258,11 +264,13 @@ A980..A982 ; Extend # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
A9B3 ; Extend # Mn JAVANESE SIGN CECAK TELU
A9B6..A9B9 ; Extend # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
A9BC ; Extend # Mn JAVANESE VOWEL SIGN PEPET
A9E5 ; Extend # Mn MYANMAR SIGN SHAN SAW
AA29..AA2E ; Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
AA31..AA32 ; Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
AA35..AA36 ; Extend # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
AA43 ; Extend # Mn CHAM CONSONANT SIGN FINAL NG
AA4C ; Extend # Mn CHAM CONSONANT SIGN FINAL M
AA7C ; Extend # Mn MYANMAR SIGN TAI LAING TONE-2
AAB0 ; Extend # Mn TAI VIET MAI KANG
AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
@ -275,29 +283,61 @@ ABE8 ; Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP
ABED ; Extend # Mn MEETEI MAYEK APUN IYEK
FB1E ; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA
FE00..FE0F ; Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
FE20..FE26 ; Extend # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
FE20..FE2D ; Extend # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON BELOW
FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
101FD ; Extend # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
102E0 ; Extend # Mn COPTIC EPACT THOUSANDS MARK
10376..1037A ; Extend # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII
10A01..10A03 ; Extend # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
10A05..10A06 ; Extend # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
10A0C..10A0F ; Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
10A38..10A3A ; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
10A3F ; Extend # Mn KHAROSHTHI VIRAMA
10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
11001 ; Extend # Mn BRAHMI SIGN ANUSVARA
11038..11046 ; Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
11080..11081 ; Extend # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
1107F..11081 ; Extend # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA
110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA
11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU
1112D..11134 ; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA
11173 ; Extend # Mn MAHAJANI SIGN NUKTA
11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA
111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
1122F..11231 ; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
11234 ; Extend # Mn KHOJKI SIGN ANUSVARA
11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA
112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA
11301 ; Extend # Mn GRANTHA SIGN CANDRABINDU
1133C ; Extend # Mn GRANTHA SIGN NUKTA
1133E ; Extend # Mc GRANTHA VOWEL SIGN AA
11340 ; Extend # Mn GRANTHA VOWEL SIGN II
11357 ; Extend # Mc GRANTHA AU LENGTH MARK
11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
114B0 ; Extend # Mc TIRHUTA VOWEL SIGN AA
114B3..114B8 ; Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
114BA ; Extend # Mn TIRHUTA VOWEL SIGN SHORT E
114BD ; Extend # Mc TIRHUTA VOWEL SIGN SHORT O
114BF..114C0 ; Extend # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA
114C2..114C3 ; Extend # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA
115AF ; Extend # Mc SIDDHAM VOWEL SIGN AA
115B2..115B5 ; Extend # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR
115BC..115BD ; Extend # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA
115BF..115C0 ; Extend # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA
11633..1163A ; Extend # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI
1163D ; Extend # Mn MODI SIGN ANUSVARA
1163F..11640 ; Extend # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA
116AB ; Extend # Mn TAKRI SIGN ANUSVARA
116AD ; Extend # Mn TAKRI VOWEL SIGN AA
116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU
116B7 ; Extend # Mn TAKRI SIGN NUKTA
16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
1D165 ; Extend # Mc MUSICAL SYMBOL COMBINING STEM
1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D16E..1D172 ; Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
@ -305,9 +345,10 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
1D185..1D18B ; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
1D1AA..1D1AD ; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 1318
# Total code points: 1461
# ================================================
@ -386,7 +427,6 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
1BA1 ; SpacingMark # Mc SUNDANESE CONSONANT SIGN PAMINGKAL
1BA6..1BA7 ; SpacingMark # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
1BAA ; SpacingMark # Mc SUNDANESE SIGN PAMAAEH
1BAC..1BAD ; SpacingMark # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA
1BE7 ; SpacingMark # Mc BATAK VOWEL SIGN E
1BEA..1BEC ; SpacingMark # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O
1BEE ; SpacingMark # Mc BATAK VOWEL SIGN U
@ -423,6 +463,27 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
11182 ; SpacingMark # Mc SHARADA SIGN VISARGA
111B3..111B5 ; SpacingMark # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II
111BF..111C0 ; SpacingMark # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
1122C..1122E ; SpacingMark # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
11232..11233 ; SpacingMark # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
11235 ; SpacingMark # Mc KHOJKI SIGN VIRAMA
112E0..112E2 ; SpacingMark # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II
11302..11303 ; SpacingMark # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA
1133F ; SpacingMark # Mc GRANTHA VOWEL SIGN I
11341..11344 ; SpacingMark # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR
11347..11348 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI
1134B..1134D ; SpacingMark # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA
11362..11363 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
114B1..114B2 ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN I..TIRHUTA VOWEL SIGN II
114B9 ; SpacingMark # Mc TIRHUTA VOWEL SIGN E
114BB..114BC ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O
114BE ; SpacingMark # Mc TIRHUTA VOWEL SIGN AU
114C1 ; SpacingMark # Mc TIRHUTA SIGN VISARGA
115B0..115B1 ; SpacingMark # Mc [2] SIDDHAM VOWEL SIGN I..SIDDHAM VOWEL SIGN II
115B8..115BB ; SpacingMark # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU
115BE ; SpacingMark # Mc SIDDHAM SIGN VISARGA
11630..11632 ; SpacingMark # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II
1163B..1163C ; SpacingMark # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU
1163E ; SpacingMark # Mc MODI SIGN VISARGA
116AC ; SpacingMark # Mc TAKRI SIGN VISARGA
116AE..116AF ; SpacingMark # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II
116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA
@ -430,7 +491,7 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT
# Total code points: 290
# Total code points: 331
# ================================================

View File

@ -1,8 +1,8 @@
# Scripts-6.3.0.txt
# Date: 2013-07-05, 14:09:02 GMT [MD]
# Scripts-7.0.0.txt
# Date: 2014-05-15, 00:11:35 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2013 Unicode, Inc.
# Copyright (c) 1991-2014 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
@ -83,8 +83,10 @@
0385 ; Common # Sk GREEK DIALYTIKA TONOS
0387 ; Common # Po GREEK ANO TELEIA
0589 ; Common # Po ARMENIAN FULL STOP
0605 ; Common # Cf ARABIC NUMBER MARK ABOVE
060C ; Common # Po ARABIC COMMA
061B ; Common # Po ARABIC SEMICOLON
061C ; Common # Cf ARABIC LETTER MARK
061F ; Common # Po ARABIC QUESTION MARK
0640 ; Common # Lm ARABIC TATWEEL
0660..0669 ; Common # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
@ -146,7 +148,7 @@
208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS
208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS
20A0..20BA ; Common # Sc [27] EURO-CURRENCY SIGN..TURKISH LIRA SIGN
20A0..20BD ; Common # Sc [30] EURO-CURRENCY SIGN..RUBLE SIGN
2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
2102 ; Common # L& DOUBLE-STRUCK CAPITAL C
2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA
@ -215,7 +217,7 @@
239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE
23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
23E2..23F3 ; Common # So [18] WHITE TRAPEZIUM..HOURGLASS WITH FLOWING SAND
23E2..23FA ; Common # So [25] WHITE TRAPEZIUM..BLACK CIRCLE FOR RECORD
2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
@ -229,8 +231,7 @@
25F8..25FF ; Common # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
2600..266E ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
266F ; Common # Sm MUSIC SHARP SIGN
2670..26FF ; Common # So [144] WEST SYRIAC CROSS..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
2701..2767 ; Common # So [103] UPPER BLADE SCISSORS..ROTATED FLORAL HEART BULLET
2670..2767 ; Common # So [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET
2768 ; Common # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
2769 ; Common # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT
276A ; Common # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
@ -298,7 +299,11 @@
2B30..2B44 ; Common # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET
2B45..2B46 ; Common # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
2B47..2B4C ; Common # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
2B50..2B59 ; Common # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE
2B4D..2B73 ; Common # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
2B76..2B95 ; Common # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
2B98..2BB9 ; Common # So [34] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..UP ARROWHEAD IN A RECTANGLE BOX
2BBD..2BC8 ; Common # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED
2BCA..2BD1 ; Common # So [8] TOP HALF BLACK CIRCLE..UNCERTAINTY SIGN
2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET
2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET
@ -332,6 +337,10 @@
2E2F ; Common # Lm VERTICAL TILDE
2E30..2E39 ; Common # Po [10] RING POINT..TOP HALF SECTION SIGN
2E3A..2E3B ; Common # Pd [2] TWO-EM DASH..THREE-EM DASH
2E3C..2E3F ; Common # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM
2E40 ; Common # Pd DOUBLE HYPHEN
2E41 ; Common # Po REVERSED COMMA
2E42 ; Common # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
3000 ; Common # Zs IDEOGRAPHIC SPACE
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
@ -395,10 +404,11 @@ A830..A835 ; Common # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC
A836..A837 ; Common # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
A838 ; Common # Sc NORTH INDIC RUPEE MARK
A839 ; Common # So NORTH INDIC QUANTITY MARK
A92E ; Common # Po KAYAH LI SIGN CWI
A9CF ; Common # Lm JAVANESE PANGRANGKEP
FD3E ; Common # Ps ORNATE LEFT PARENTHESIS
FD3F ; Common # Pe ORNATE RIGHT PARENTHESIS
FDFD ; Common # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
AB5B ; Common # Sk MODIFIER BREVE WITH INVERTED BREVE
FD3E ; Common # Pe ORNATE LEFT PARENTHESIS
FD3F ; Common # Ps ORNATE RIGHT PARENTHESIS
FE10..FE16 ; Common # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK
FE17 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET
FE18 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET
@ -491,6 +501,8 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
10137..1013F ; Common # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
10190..1019B ; Common # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
101D0..101FC ; Common # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
102E1..102FB ; Common # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
1BCA0..1BCA3 ; Common # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
1D129..1D164 ; Common # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
@ -547,10 +559,10 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1F000..1F02B ; Common # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
1F030..1F093 ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
1F0A0..1F0AE ; Common # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES
1F0B1..1F0BE ; Common # So [14] PLAYING CARD ACE OF HEARTS..PLAYING CARD KING OF HEARTS
1F0B1..1F0BF ; Common # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER
1F0C1..1F0CF ; Common # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
1F0D1..1F0DF ; Common # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER
1F100..1F10A ; Common # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
1F0D1..1F0F5 ; Common # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21
1F100..1F10C ; Common # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
1F130..1F16B ; Common # So [60] SQUARED LATIN CAPITAL LETTER A..RAISED MD SIGN
1F170..1F19A ; Common # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS
@ -559,28 +571,29 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
1F210..1F23A ; Common # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6
1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
1F250..1F251 ; Common # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
1F300..1F320 ; Common # So [33] CYCLONE..SHOOTING STAR
1F330..1F335 ; Common # So [6] CHESTNUT..CACTUS
1F337..1F37C ; Common # So [70] TULIP..BABY BOTTLE
1F380..1F393 ; Common # So [20] RIBBON..GRADUATION CAP
1F3A0..1F3C4 ; Common # So [37] CAROUSEL HORSE..SURFER
1F3C6..1F3CA ; Common # So [5] TROPHY..SWIMMER
1F3E0..1F3F0 ; Common # So [17] HOUSE BUILDING..EUROPEAN CASTLE
1F400..1F43E ; Common # So [63] RAT..PAW PRINTS
1F440 ; Common # So EYES
1F442..1F4F7 ; Common # So [182] EAR..CAMERA
1F4F9..1F4FC ; Common # So [4] VIDEO CAMERA..VIDEOCASSETTE
1F500..1F53D ; Common # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE
1F540..1F543 ; Common # So [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS
1F550..1F567 ; Common # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY
1F5FB..1F640 ; Common # So [70] MOUNT FUJI..WEARY CAT FACE
1F645..1F64F ; Common # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS
1F680..1F6C5 ; Common # So [70] ROCKET..LEFT LUGGAGE
1F300..1F32C ; Common # So [45] CYCLONE..WIND BLOWING FACE
1F330..1F37D ; Common # So [78] CHESTNUT..FORK AND KNIFE WITH PLATE
1F380..1F3CE ; Common # So [79] RIBBON..RACING CAR
1F3D4..1F3F7 ; Common # So [36] SNOW CAPPED MOUNTAIN..LABEL
1F400..1F4FE ; Common # So [255] RAT..PORTABLE STEREO
1F500..1F54A ; Common # So [75] TWISTED RIGHTWARDS ARROWS..DOVE OF PEACE
1F550..1F579 ; Common # So [42] CLOCK FACE ONE OCLOCK..JOYSTICK
1F57B..1F5A3 ; Common # So [41] LEFT HAND TELEPHONE RECEIVER..BLACK DOWN POINTING BACKHAND INDEX
1F5A5..1F642 ; Common # So [158] DESKTOP COMPUTER..SLIGHTLY SMILING FACE
1F645..1F6CF ; Common # So [139] FACE WITH NO GOOD GESTURE..BED
1F6E0..1F6EC ; Common # So [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
1F6F0..1F6F3 ; Common # So [4] SATELLITE..PASSENGER SHIP
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
1F780..1F7D4 ; Common # So [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR
1F800..1F80B ; Common # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
1F810..1F847 ; Common # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW
1F850..1F859 ; Common # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
1F860..1F887 ; Common # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
E0001 ; Common # Cf LANGUAGE TAG
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 6418
# Total code points: 7129
# ================================================
@ -622,16 +635,20 @@ A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN
A770 ; Latin # Lm MODIFIER LETTER US
A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
A790..A793 ; Latin # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR
A7A0..A7AA ; Latin # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK
A790..A7AD ; Latin # L& [30] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER L WITH BELT
A7B0..A7B1 ; Latin # L& [2] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER TURNED T
A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
A7FA ; Latin # L& LATIN LETTER SMALL CAPITAL TURNED M
A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M
AB30..AB5A ; Latin # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
AB5C..AB5F ; Latin # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
AB64 ; Latin # L& LATIN SMALL LETTER INVERTED ALPHA
FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
# Total code points: 1272
# Total code points: 1338
# ================================================
@ -640,6 +657,7 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
0376..0377 ; Greek # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
037A ; Greek # Lm GREEK YPOGEGRAMMENI
037B..037D ; Greek # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
037F ; Greek # L& GREEK CAPITAL LETTER YOT
0384 ; Greek # Sk GREEK TONOS
0386 ; Greek # L& GREEK CAPITAL LETTER ALPHA WITH TONOS
0388..038A ; Greek # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
@ -679,15 +697,18 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
1FF6..1FFC ; Greek # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
1FFD..1FFE ; Greek # Sk [2] GREEK OXIA..GREEK DASIA
2126 ; Greek # L& OHM SIGN
AB65 ; Greek # L& GREEK LETTER SMALL CAPITAL OMEGA
10140..10174 ; Greek # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
10175..10178 ; Greek # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN
10179..10189 ; Greek # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
1018A ; Greek # No GREEK ZERO SIGN
1018A..1018B ; Greek # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN
1018C ; Greek # So GREEK SINUSOID SIGN
101A0 ; Greek # So GREEK SYMBOL TAU RHO
1D200..1D241 ; Greek # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
1D242..1D244 ; Greek # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
1D245 ; Greek # So GREEK MUSICAL LEIMMA
# Total code points: 511
# Total code points: 516
# ================================================
@ -696,7 +717,7 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
0483..0484 ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION
0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE
0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
048A..0527 ; Cyrillic # L& [158] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER SHHA WITH DESCENDER
048A..052F ; Cyrillic # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER
1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL
1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN
2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
@ -708,10 +729,11 @@ A673 ; Cyrillic # Po SLAVONIC ASTERISK
A674..A67D ; Cyrillic # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK
A67E ; Cyrillic # Po CYRILLIC KAVYKA
A67F ; Cyrillic # Lm CYRILLIC PAYEROK
A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
A680..A69B ; Cyrillic # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O
A69C..A69D ; Cyrillic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN
A69F ; Cyrillic # Mn COMBINING CYRILLIC LETTER IOTIFIED E
# Total code points: 417
# Total code points: 431
# ================================================
@ -720,10 +742,11 @@ A69F ; Cyrillic # Mn COMBINING CYRILLIC LETTER IOTIFIED E
055A..055F ; Armenian # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
0561..0587 ; Armenian # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
058A ; Armenian # Pd ARMENIAN HYPHEN
058D..058E ; Armenian # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN
058F ; Armenian # Sc ARMENIAN DRAM SIGN
FB13..FB17 ; Armenian # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
# Total code points: 91
# Total code points: 93
# ================================================
@ -761,7 +784,6 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
060D ; Arabic # Po ARABIC DATE SEPARATOR
060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
061C ; Arabic # Cf ARABIC LETTER MARK
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
@ -784,9 +806,8 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
06FD..06FE ; Arabic # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
08A0 ; Arabic # Lo ARABIC LETTER BEH WITH SMALL V BELOW
08A2..08AC ; Arabic # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH
08E4..08FE ; Arabic # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT
08A0..08B2 ; Arabic # Lo [19] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER ZAIN WITH INVERTED V ABOVE
08E4..08FF ; Arabic # Mn [28] ARABIC CURLY FATHA..ARABIC MARK SIDEWAYS NOON GHUNNA
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
FBB2..FBC1 ; Arabic # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
@ -794,6 +815,7 @@ FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIA
FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
FDF0..FDFB ; Arabic # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
FDFC ; Arabic # Sc RIAL SIGN
FDFD ; Arabic # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
@ -832,7 +854,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
# Total code points: 1236
# Total code points: 1244
# ================================================
@ -875,17 +897,17 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
0970 ; Devanagari # Po DEVANAGARI ABBREVIATION SIGN
0971 ; Devanagari # Lm DEVANAGARI SIGN HIGH SPACING DOT
0972..0977 ; Devanagari # Lo [6] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER UUE
0979..097F ; Devanagari # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
0972..097F ; Devanagari # Lo [14] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER BBA
A8E0..A8F1 ; Devanagari # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A8F2..A8F7 ; Devanagari # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
A8F8..A8FA ; Devanagari # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET
A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
# Total code points: 151
# Total code points: 152
# ================================================
0980 ; Bengali # Lo BENGALI ANJI
0981 ; Bengali # Mn BENGALI SIGN CANDRABINDU
0982..0983 ; Bengali # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
0985..098C ; Bengali # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
@ -913,7 +935,7 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
09FA ; Bengali # So BENGALI ISSHAR
09FB ; Bengali # Sc BENGALI GANDA MARK
# Total code points: 92
# Total code points: 93
# ================================================
@ -1030,12 +1052,12 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
# ================================================
0C00 ; Telugu # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE
0C01..0C03 ; Telugu # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
0C05..0C0C ; Telugu # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L
0C0E..0C10 ; Telugu # Lo [3] TELUGU LETTER E..TELUGU LETTER AI
0C12..0C28 ; Telugu # Lo [23] TELUGU LETTER O..TELUGU LETTER NA
0C2A..0C33 ; Telugu # Lo [10] TELUGU LETTER PA..TELUGU LETTER LLA
0C35..0C39 ; Telugu # Lo [5] TELUGU LETTER VA..TELUGU LETTER HA
0C2A..0C39 ; Telugu # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA
0C3D ; Telugu # Lo TELUGU SIGN AVAGRAHA
0C3E..0C40 ; Telugu # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
0C41..0C44 ; Telugu # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
@ -1049,10 +1071,11 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
0C78..0C7E ; Telugu # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
0C7F ; Telugu # So TELUGU SIGN TUUMU
# Total code points: 93
# Total code points: 95
# ================================================
0C81 ; Kannada # Mn KANNADA SIGN CANDRABINDU
0C82..0C83 ; Kannada # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
0C85..0C8C ; Kannada # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
0C8E..0C90 ; Kannada # Lo [3] KANNADA LETTER E..KANNADA LETTER AI
@ -1075,10 +1098,11 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
# Total code points: 86
# Total code points: 87
# ================================================
0D01 ; Malayalam # Mn MALAYALAM SIGN CANDRABINDU
0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
@ -1098,7 +1122,7 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
0D79 ; Malayalam # So MALAYALAM DATE MARK
0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
# Total code points: 98
# Total code points: 99
# ================================================
@ -1113,10 +1137,12 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
0DD2..0DD4 ; Sinhala # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
0DD6 ; Sinhala # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA
0DD8..0DDF ; Sinhala # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA
0DE6..0DEF ; Sinhala # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE
0DF2..0DF3 ; Sinhala # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
0DF4 ; Sinhala # Po SINHALA PUNCTUATION KUNDDALIYA
111E1..111F4 ; Sinhala # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND
# Total code points: 80
# Total code points: 110
# ================================================
@ -1239,14 +1265,23 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
109A..109C ; Myanmar # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A
109D ; Myanmar # Mn MYANMAR VOWEL SIGN AITON AI
109E..109F ; Myanmar # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION
A9E0..A9E4 ; Myanmar # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA
A9E5 ; Myanmar # Mn MYANMAR SIGN SHAN SAW
A9E6 ; Myanmar # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION
A9E7..A9EF ; Myanmar # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA
A9F0..A9F9 ; Myanmar # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE
A9FA..A9FE ; Myanmar # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA
AA60..AA6F ; Myanmar # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA
AA70 ; Myanmar # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
AA71..AA76 ; Myanmar # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM
AA77..AA79 ; Myanmar # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO
AA7A ; Myanmar # Lo MYANMAR LETTER AITON RA
AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE
AA7C ; Myanmar # Mn MYANMAR SIGN TAI LAING TONE-2
AA7D ; Myanmar # Mc MYANMAR SIGN TAI LAING TONE-5
AA7E..AA7F ; Myanmar # Lo [2] MYANMAR LETTER SHWE PALAUNG CHA..MYANMAR LETTER SHWE PALAUNG SHA
# Total code points: 188
# Total code points: 223
# ================================================
@ -1350,8 +1385,9 @@ AB28..AB2E ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
16A0..16EA ; Runic # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
16EE..16F0 ; Runic # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
16F1..16F8 ; Runic # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC
# Total code points: 78
# Total code points: 86
# ================================================
@ -1457,10 +1493,10 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
# ================================================
10300..1031E ; Old_Italic # Lo [31] OLD ITALIC LETTER A..OLD ITALIC LETTER UU
10300..1031F ; Old_Italic # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS
10320..10323 ; Old_Italic # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
# Total code points: 35
# Total code points: 36
# ================================================
@ -1484,12 +1520,15 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF
0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA
1AB0..1ABD ; Inherited # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
1ABE ; Inherited # Me COMBINING PARENTHESES OVERLAY
1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; Inherited # Mn VEDIC SIGN TIRYAK
1CF4 ; Inherited # Mn VEDIC TONE CANDRA ABOVE
1DC0..1DE6 ; Inherited # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
1CF8..1CF9 ; Inherited # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
1DC0..1DF5 ; Inherited # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE
1DFC..1DFF ; Inherited # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
@ -1500,15 +1539,16 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
302A..302D ; Inherited # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
3099..309A ; Inherited # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
FE00..FE0F ; Inherited # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
FE20..FE2D ; Inherited # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON BELOW
101FD ; Inherited # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
102E0 ; Inherited # Mn COPTIC EPACT THOUSANDS MARK
1D167..1D169 ; Inherited # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D17B..1D182 ; Inherited # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
1D185..1D18B ; Inherited # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 523
# Total code points: 563
# ================================================
@ -1542,7 +1582,7 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
# ================================================
1900..191C ; Limbu # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
1900..191E ; Limbu # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA
1920..1922 ; Limbu # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
1923..1926 ; Limbu # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
1927..1928 ; Limbu # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
@ -1555,7 +1595,7 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
1944..1945 ; Limbu # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
1946..194F ; Limbu # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
# Total code points: 66
# Total code points: 68
# ================================================
@ -1730,11 +1770,11 @@ A828..A82B ; Syloti_Nagri # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI
# ================================================
12000..1236E ; Cuneiform # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
12400..12462 ; Cuneiform # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
12470..12473 ; Cuneiform # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON
12000..12398 ; Cuneiform # Lo [921] CUNEIFORM SIGN A..CUNEIFORM SIGN UM TIMES ME
12400..1246E ; Cuneiform # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
12470..12474 ; Cuneiform # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON
# Total code points: 982
# Total code points: 1037
# ================================================
@ -1773,8 +1813,7 @@ A874..A877 ; Phags_Pa # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOU
1BA6..1BA7 ; Sundanese # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
1BA8..1BA9 ; Sundanese # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
1BAA ; Sundanese # Mc SUNDANESE SIGN PAMAAEH
1BAB ; Sundanese # Mn SUNDANESE SIGN VIRAMA
1BAC..1BAD ; Sundanese # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA
1BAB..1BAD ; Sundanese # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA
1BAE..1BAF ; Sundanese # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA
1BB0..1BB9 ; Sundanese # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
1BBA..1BBF ; Sundanese # Lo [6] SUNDANESE AVAGRAHA..SUNDANESE LETTER FINAL M
@ -1831,9 +1870,9 @@ A8D0..A8D9 ; Saurashtra # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NI
A900..A909 ; Kayah_Li # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
A90A..A925 ; Kayah_Li # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO
A926..A92D ; Kayah_Li # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
A92E..A92F ; Kayah_Li # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA
A92F ; Kayah_Li # Po KAYAH LI SIGN SHYA
# Total code points: 48
# Total code points: 47
# ================================================
@ -2085,8 +2124,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
11047..1104D ; Brahmi # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
11052..11065 ; Brahmi # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND
11066..1106F ; Brahmi # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
1107F ; Brahmi # Mn BRAHMI NUMBER JOINER
# Total code points: 108
# Total code points: 109
# ================================================
@ -2141,9 +2181,11 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
111BF..111C0 ; Sharada # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
111C1..111C4 ; Sharada # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM
111C5..111C8 ; Sharada # Po [4] SHARADA DANDA..SHARADA SEPARATOR
111CD ; Sharada # Po SHARADA SUTRA MARK
111D0..111D9 ; Sharada # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE
111DA ; Sharada # Lo SHARADA EKAM
# Total code points: 83
# Total code points: 85
# ================================================
@ -2166,4 +2208,244 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
# Total code points: 66
# ================================================
10530..10563 ; Caucasian_Albanian # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW
1056F ; Caucasian_Albanian # Po CAUCASIAN ALBANIAN CITATION MARK
# Total code points: 53
# ================================================
16AD0..16AED ; Bassa_Vah # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I
16AF0..16AF4 ; Bassa_Vah # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
16AF5 ; Bassa_Vah # Po BASSA VAH FULL STOP
# Total code points: 36
# ================================================
1BC00..1BC6A ; Duployan # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
1BC70..1BC7C ; Duployan # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
1BC80..1BC88 ; Duployan # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
1BC90..1BC99 ; Duployan # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW
1BC9C ; Duployan # So DUPLOYAN SIGN O WITH CROSS
1BC9D..1BC9E ; Duployan # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
1BC9F ; Duployan # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
# Total code points: 143
# ================================================
10500..10527 ; Elbasan # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE
# Total code points: 40
# ================================================
11301 ; Grantha # Mn GRANTHA SIGN CANDRABINDU
11302..11303 ; Grantha # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA
11305..1130C ; Grantha # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L
1130F..11310 ; Grantha # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI
11313..11328 ; Grantha # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA
1132A..11330 ; Grantha # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA
11332..11333 ; Grantha # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA
11335..11339 ; Grantha # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA
1133C ; Grantha # Mn GRANTHA SIGN NUKTA
1133D ; Grantha # Lo GRANTHA SIGN AVAGRAHA
1133E..1133F ; Grantha # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I
11340 ; Grantha # Mn GRANTHA VOWEL SIGN II
11341..11344 ; Grantha # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR
11347..11348 ; Grantha # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI
1134B..1134D ; Grantha # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA
11357 ; Grantha # Mc GRANTHA AU LENGTH MARK
1135D..11361 ; Grantha # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL
11362..11363 ; Grantha # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
11366..1136C ; Grantha # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
11370..11374 ; Grantha # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
# Total code points: 83
# ================================================
16B00..16B2F ; Pahawh_Hmong # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU
16B30..16B36 ; Pahawh_Hmong # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16B37..16B3B ; Pahawh_Hmong # Po [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM
16B3C..16B3F ; Pahawh_Hmong # So [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB
16B40..16B43 ; Pahawh_Hmong # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM
16B44 ; Pahawh_Hmong # Po PAHAWH HMONG SIGN XAUS
16B45 ; Pahawh_Hmong # So PAHAWH HMONG SIGN CIM TSOV ROG
16B50..16B59 ; Pahawh_Hmong # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE
16B5B..16B61 ; Pahawh_Hmong # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS
16B63..16B77 ; Pahawh_Hmong # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS
16B7D..16B8F ; Pahawh_Hmong # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ
# Total code points: 127
# ================================================
11200..11211 ; Khojki # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA
11213..1122B ; Khojki # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA
1122C..1122E ; Khojki # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
1122F..11231 ; Khojki # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
11232..11233 ; Khojki # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
11234 ; Khojki # Mn KHOJKI SIGN ANUSVARA
11235 ; Khojki # Mc KHOJKI SIGN VIRAMA
11236..11237 ; Khojki # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
11238..1123D ; Khojki # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN
# Total code points: 61
# ================================================
10600..10736 ; Linear_A # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; Linear_A # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; Linear_A # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
# Total code points: 341
# ================================================
11150..11172 ; Mahajani # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA
11173 ; Mahajani # Mn MAHAJANI SIGN NUKTA
11174..11175 ; Mahajani # Po [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK
11176 ; Mahajani # Lo MAHAJANI LIGATURE SHRI
# Total code points: 39
# ================================================
10AC0..10AC7 ; Manichaean # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW
10AC8 ; Manichaean # So MANICHAEAN SIGN UD
10AC9..10AE4 ; Manichaean # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW
10AE5..10AE6 ; Manichaean # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10AEB..10AEF ; Manichaean # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED
10AF0..10AF6 ; Manichaean # Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER
# Total code points: 51
# ================================================
1E800..1E8C4 ; Mende_Kikakui # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON
1E8C7..1E8CF ; Mende_Kikakui # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE
1E8D0..1E8D6 ; Mende_Kikakui # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
# Total code points: 213
# ================================================
11600..1162F ; Modi # Lo [48] MODI LETTER A..MODI LETTER LLA
11630..11632 ; Modi # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II
11633..1163A ; Modi # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI
1163B..1163C ; Modi # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU
1163D ; Modi # Mn MODI SIGN ANUSVARA
1163E ; Modi # Mc MODI SIGN VISARGA
1163F..11640 ; Modi # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA
11641..11643 ; Modi # Po [3] MODI DANDA..MODI ABBREVIATION SIGN
11644 ; Modi # Lo MODI SIGN HUVA
11650..11659 ; Modi # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE
# Total code points: 79
# ================================================
16A40..16A5E ; Mro # Lo [31] MRO LETTER TA..MRO LETTER TEK
16A60..16A69 ; Mro # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE
16A6E..16A6F ; Mro # Po [2] MRO DANDA..MRO DOUBLE DANDA
# Total code points: 43
# ================================================
10A80..10A9C ; Old_North_Arabian # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH
10A9D..10A9F ; Old_North_Arabian # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY
# Total code points: 32
# ================================================
10880..1089E ; Nabataean # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW
108A7..108AF ; Nabataean # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED
# Total code points: 40
# ================================================
10860..10876 ; Palmyrene # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW
10877..10878 ; Palmyrene # So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON
10879..1087F ; Palmyrene # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY
# Total code points: 32
# ================================================
11AC0..11AF8 ; Pau_Cin_Hau # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
# Total code points: 57
# ================================================
10350..10375 ; Old_Permic # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA
10376..1037A ; Old_Permic # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII
# Total code points: 43
# ================================================
10B80..10B91 ; Psalter_Pahlavi # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW
10B99..10B9C ; Psalter_Pahlavi # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT
10BA9..10BAF ; Psalter_Pahlavi # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED
# Total code points: 29
# ================================================
11580..115AE ; Siddham # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA
115AF..115B1 ; Siddham # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II
115B2..115B5 ; Siddham # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR
115B8..115BB ; Siddham # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU
115BC..115BD ; Siddham # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA
115BE ; Siddham # Mc SIDDHAM SIGN VISARGA
115BF..115C0 ; Siddham # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA
115C1..115C9 ; Siddham # Po [9] SIDDHAM SIGN SIDDHAM..SIDDHAM END OF TEXT MARK
# Total code points: 72
# ================================================
112B0..112DE ; Khudawadi # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA
112DF ; Khudawadi # Mn KHUDAWADI SIGN ANUSVARA
112E0..112E2 ; Khudawadi # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II
112E3..112EA ; Khudawadi # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA
112F0..112F9 ; Khudawadi # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE
# Total code points: 69
# ================================================
11480..114AF ; Tirhuta # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA
114B0..114B2 ; Tirhuta # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II
114B3..114B8 ; Tirhuta # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
114B9 ; Tirhuta # Mc TIRHUTA VOWEL SIGN E
114BA ; Tirhuta # Mn TIRHUTA VOWEL SIGN SHORT E
114BB..114BE ; Tirhuta # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU
114BF..114C0 ; Tirhuta # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA
114C1 ; Tirhuta # Mc TIRHUTA SIGN VISARGA
114C2..114C3 ; Tirhuta # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA
114C4..114C5 ; Tirhuta # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG
114C6 ; Tirhuta # Po TIRHUTA ABBREVIATION SIGN
114C7 ; Tirhuta # Lo TIRHUTA OM
114D0..114D9 ; Tirhuta # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
# Total code points: 82
# ================================================
118A0..118DF ; Warang_Citi # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO
118E0..118E9 ; Warang_Citi # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
118EA..118F2 ; Warang_Citi # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY
118FF ; Warang_Citi # Lo WARANG CITI OM
# Total code points: 84
# EOF

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
* A program for testing the Unicode property table *
***************************************************/
/* Copyright (c) University of Cambridge 2008 */
/* Copyright (c) University of Cambridge 2008 - 2014 */
/* Compile thus:
gcc -DHAVE_CONFIG_H -DPCRE2_CODE_UNIT_WIDTH=8 -o ucptest \
@ -236,6 +236,30 @@ switch(script)
case ucp_Sora_Sompeng: scriptname = US"Sora Sompent"; break;
case ucp_Takri: scriptname = US"Takri"; break;
/* New for Unicode 7.0.0 */
case ucp_Bassa_Vah: scriptname = US"Bassa_Vah"; break;
case ucp_Caucasian_Albanian: scriptname = US"Caucasian_Albanian"; break;
case ucp_Duployan: scriptname = US"Duployan"; break;
case ucp_Elbasan: scriptname = US"Elbasan"; break;
case ucp_Grantha: scriptname = US"Grantha"; break;
case ucp_Khojki: scriptname = US"Khojki"; break;
case ucp_Khudawadi: scriptname = US"Khudawadi"; break;
case ucp_Linear_A: scriptname = US"Linear_A"; break;
case ucp_Mahajani: scriptname = US"Mahajani"; break;
case ucp_Manichaean: scriptname = US"Manichaean"; break;
case ucp_Mende_Kikakui: scriptname = US"Mende_Kikakui"; break;
case ucp_Modi: scriptname = US"Modi"; break;
case ucp_Mro: scriptname = US"Mro"; break;
case ucp_Nabataean: scriptname = US"Nabataean"; break;
case ucp_Old_North_Arabian: scriptname = US"Old_North_Arabian"; break;
case ucp_Old_Permic: scriptname = US"Old_Permic"; break;
case ucp_Pahawh_Hmong: scriptname = US"Pahawh_Hmong"; break;
case ucp_Palmyrene: scriptname = US"Palmyrene"; break;
case ucp_Psalter_Pahlavi: scriptname = US"Psalter_Pahlavi"; break;
case ucp_Pau_Cin_Hau: scriptname = US"Pau_Cin_Hau"; break;
case ucp_Siddham: scriptname = US"Siddham"; break;
case ucp_Tirhuta: scriptname = US"Tirhuta"; break;
case ucp_Warang_Citi: scriptname = US"Warang_Citi"; break;
}
printf("%04x %s: %s, %s, %s", c, typename, fulltypename, scriptname, graphbreak);

View File

@ -32,3 +32,5 @@ findprop 10b00 10b35 13000 1342e 10840 10855
findprop 11100 1113c 11680 116c0
findprop 0d 0a 0e 0711 1b04 1111 1169 11fe ae4c ad89
findprop 118a0 11ac7 16ad0

View File

@ -357,3 +357,8 @@ findprop 0d 0a 0e 0711 1b04 1111 1169 11fe ae4c ad89
11fe Letter: Other letter, Hangul, Hangul syllable type T
ae4c Letter: Other letter, Hangul, Hangul syllable type LV
ad89 Letter: Other letter, Hangul, Hangul syllable type LVT
findprop 118a0 11ac7 16ad0
118a0 Letter: Upper case letter, Warang_Citi, Other, 118c0
11ac7 Letter: Other letter, Pau_Cin_Hau, Other
16ad0 Letter: Other letter, Bassa_Vah, Other

View File

@ -127,7 +127,7 @@ ucp_gbXX values defined in pcre2_ucp.h. The 2-dimensional table is indexed by
the properties of two adjacent code points. The left property selects a word
from the table, and the right property selects a bit from that word like this:
ucp_gbtable[left-property] & (1 << right-property)
PRIV(ucp_gbtable)[left-property] & (1 << right-property)
The value is non-zero if a grapheme break is NOT permitted between the relevant
two code points. The breaking rules are as follows:
@ -155,7 +155,7 @@ are implementing).
7. Otherwise, break everywhere.
*/
const uint32_t PRIV(ucp_gbtable[]) = {
const uint32_t PRIV(ucp_gbtable)[] = {
(1<<ucp_gbLF), /* 0 CR */
0, /* 1 LF */
0, /* 2 Control */
@ -196,7 +196,7 @@ const int PRIV(ucp_typerange)[] = {
};
#endif /* SUPPORT_JIT */
/* The pcre_utt[] table below translates Unicode property names into type and
/* The PRIV(utt)[] table below translates Unicode property names into type and
code values. It is searched by binary chop, so must be in collating sequence of
name. Originally, the table contained pointers to the name strings in the first
field of each entry. However, that leads to a large number of relocations when
@ -219,6 +219,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
#define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0"
#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
@ -229,6 +230,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_C0 STR_C "\0"
#define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0"
#define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0"
#define STRING_Caucasian_Albanian0 STR_C STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0"
#define STRING_Cc0 STR_C STR_c "\0"
#define STRING_Cf0 STR_C STR_f "\0"
#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
@ -244,11 +246,14 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
#define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0"
#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0"
#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
@ -268,12 +273,15 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
#define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0"
#define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
#define STRING_L0 STR_L "\0"
#define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0"
#define STRING_Lao0 STR_L STR_a STR_o "\0"
#define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0"
#define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0"
#define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0"
#define STRING_Linear_A0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_A "\0"
#define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0"
#define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0"
#define STRING_Ll0 STR_L STR_l "\0"
@ -284,18 +292,24 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0"
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
#define STRING_M0 STR_M "\0"
#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
#define STRING_Mc0 STR_M STR_c "\0"
#define STRING_Me0 STR_M STR_e "\0"
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0"
#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
#define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
#define STRING_Miao0 STR_M STR_i STR_a STR_o "\0"
#define STRING_Mn0 STR_M STR_n "\0"
#define STRING_Modi0 STR_M STR_o STR_d STR_i "\0"
#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
#define STRING_Mro0 STR_M STR_r STR_o "\0"
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
#define STRING_N0 STR_N "\0"
#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
#define STRING_Nd0 STR_N STR_d "\0"
#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
#define STRING_Nko0 STR_N STR_k STR_o "\0"
@ -304,12 +318,17 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
#define STRING_P0 STR_P "\0"
#define STRING_Pahawh_Hmong0 STR_P STR_a STR_h STR_a STR_w STR_h STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
#define STRING_Palmyrene0 STR_P STR_a STR_l STR_m STR_y STR_r STR_e STR_n STR_e "\0"
#define STRING_Pau_Cin_Hau0 STR_P STR_a STR_u STR_UNDERSCORE STR_C STR_i STR_n STR_UNDERSCORE STR_H STR_a STR_u "\0"
#define STRING_Pc0 STR_P STR_c "\0"
#define STRING_Pd0 STR_P STR_d "\0"
#define STRING_Pe0 STR_P STR_e "\0"
@ -319,6 +338,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Pi0 STR_P STR_i "\0"
#define STRING_Po0 STR_P STR_o "\0"
#define STRING_Ps0 STR_P STR_s "\0"
#define STRING_Psalter_Pahlavi0 STR_P STR_s STR_a STR_l STR_t STR_e STR_r STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
#define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0"
#define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0"
#define STRING_S0 STR_S "\0"
@ -327,6 +347,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Sc0 STR_S STR_c "\0"
#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
#define STRING_Siddham0 STR_S STR_i STR_d STR_d STR_h STR_a STR_m "\0"
#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
#define STRING_Sk0 STR_S STR_k "\0"
#define STRING_Sm0 STR_S STR_m "\0"
@ -347,8 +368,10 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
#define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0"
#define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
#define STRING_Tirhuta0 STR_T STR_i STR_r STR_h STR_u STR_t STR_a "\0"
#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
#define STRING_Vai0 STR_V STR_a STR_i "\0"
#define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0"
#define STRING_Xan0 STR_X STR_a STR_n "\0"
#define STRING_Xps0 STR_X STR_p STR_s "\0"
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
@ -367,6 +390,7 @@ const char PRIV(utt_names)[] =
STRING_Avestan0
STRING_Balinese0
STRING_Bamum0
STRING_Bassa_Vah0
STRING_Batak0
STRING_Bengali0
STRING_Bopomofo0
@ -377,6 +401,7 @@ const char PRIV(utt_names)[] =
STRING_C0
STRING_Canadian_Aboriginal0
STRING_Carian0
STRING_Caucasian_Albanian0
STRING_Cc0
STRING_Cf0
STRING_Chakma0
@ -392,11 +417,14 @@ const char PRIV(utt_names)[] =
STRING_Cyrillic0
STRING_Deseret0
STRING_Devanagari0
STRING_Duployan0
STRING_Egyptian_Hieroglyphs0
STRING_Elbasan0
STRING_Ethiopic0
STRING_Georgian0
STRING_Glagolitic0
STRING_Gothic0
STRING_Grantha0
STRING_Greek0
STRING_Gujarati0
STRING_Gurmukhi0
@ -416,12 +444,15 @@ const char PRIV(utt_names)[] =
STRING_Kayah_Li0
STRING_Kharoshthi0
STRING_Khmer0
STRING_Khojki0
STRING_Khudawadi0
STRING_L0
STRING_L_AMPERSAND0
STRING_Lao0
STRING_Latin0
STRING_Lepcha0
STRING_Limbu0
STRING_Linear_A0
STRING_Linear_B0
STRING_Lisu0
STRING_Ll0
@ -432,18 +463,24 @@ const char PRIV(utt_names)[] =
STRING_Lycian0
STRING_Lydian0
STRING_M0
STRING_Mahajani0
STRING_Malayalam0
STRING_Mandaic0
STRING_Manichaean0
STRING_Mc0
STRING_Me0
STRING_Meetei_Mayek0
STRING_Mende_Kikakui0
STRING_Meroitic_Cursive0
STRING_Meroitic_Hieroglyphs0
STRING_Miao0
STRING_Mn0
STRING_Modi0
STRING_Mongolian0
STRING_Mro0
STRING_Myanmar0
STRING_N0
STRING_Nabataean0
STRING_Nd0
STRING_New_Tai_Lue0
STRING_Nko0
@ -452,12 +489,17 @@ const char PRIV(utt_names)[] =
STRING_Ogham0
STRING_Ol_Chiki0
STRING_Old_Italic0
STRING_Old_North_Arabian0
STRING_Old_Permic0
STRING_Old_Persian0
STRING_Old_South_Arabian0
STRING_Old_Turkic0
STRING_Oriya0
STRING_Osmanya0
STRING_P0
STRING_Pahawh_Hmong0
STRING_Palmyrene0
STRING_Pau_Cin_Hau0
STRING_Pc0
STRING_Pd0
STRING_Pe0
@ -467,6 +509,7 @@ const char PRIV(utt_names)[] =
STRING_Pi0
STRING_Po0
STRING_Ps0
STRING_Psalter_Pahlavi0
STRING_Rejang0
STRING_Runic0
STRING_S0
@ -475,6 +518,7 @@ const char PRIV(utt_names)[] =
STRING_Sc0
STRING_Sharada0
STRING_Shavian0
STRING_Siddham0
STRING_Sinhala0
STRING_Sk0
STRING_Sm0
@ -495,8 +539,10 @@ const char PRIV(utt_names)[] =
STRING_Thai0
STRING_Tibetan0
STRING_Tifinagh0
STRING_Tirhuta0
STRING_Ugaritic0
STRING_Vai0
STRING_Warang_Citi0
STRING_Xan0
STRING_Xps0
STRING_Xsp0
@ -515,146 +561,169 @@ const ucp_type_table PRIV(utt)[] = {
{ 20, PT_SC, ucp_Avestan },
{ 28, PT_SC, ucp_Balinese },
{ 37, PT_SC, ucp_Bamum },
{ 43, PT_SC, ucp_Batak },
{ 49, PT_SC, ucp_Bengali },
{ 57, PT_SC, ucp_Bopomofo },
{ 66, PT_SC, ucp_Brahmi },
{ 73, PT_SC, ucp_Braille },
{ 81, PT_SC, ucp_Buginese },
{ 90, PT_SC, ucp_Buhid },
{ 96, PT_GC, ucp_C },
{ 98, PT_SC, ucp_Canadian_Aboriginal },
{ 118, PT_SC, ucp_Carian },
{ 125, PT_PC, ucp_Cc },
{ 128, PT_PC, ucp_Cf },
{ 131, PT_SC, ucp_Chakma },
{ 138, PT_SC, ucp_Cham },
{ 143, PT_SC, ucp_Cherokee },
{ 152, PT_PC, ucp_Cn },
{ 155, PT_PC, ucp_Co },
{ 158, PT_SC, ucp_Common },
{ 165, PT_SC, ucp_Coptic },
{ 172, PT_PC, ucp_Cs },
{ 175, PT_SC, ucp_Cuneiform },
{ 185, PT_SC, ucp_Cypriot },
{ 193, PT_SC, ucp_Cyrillic },
{ 202, PT_SC, ucp_Deseret },
{ 210, PT_SC, ucp_Devanagari },
{ 221, PT_SC, ucp_Egyptian_Hieroglyphs },
{ 242, PT_SC, ucp_Ethiopic },
{ 251, PT_SC, ucp_Georgian },
{ 260, PT_SC, ucp_Glagolitic },
{ 271, PT_SC, ucp_Gothic },
{ 278, PT_SC, ucp_Greek },
{ 284, PT_SC, ucp_Gujarati },
{ 293, PT_SC, ucp_Gurmukhi },
{ 302, PT_SC, ucp_Han },
{ 306, PT_SC, ucp_Hangul },
{ 313, PT_SC, ucp_Hanunoo },
{ 321, PT_SC, ucp_Hebrew },
{ 328, PT_SC, ucp_Hiragana },
{ 337, PT_SC, ucp_Imperial_Aramaic },
{ 354, PT_SC, ucp_Inherited },
{ 364, PT_SC, ucp_Inscriptional_Pahlavi },
{ 386, PT_SC, ucp_Inscriptional_Parthian },
{ 409, PT_SC, ucp_Javanese },
{ 418, PT_SC, ucp_Kaithi },
{ 425, PT_SC, ucp_Kannada },
{ 433, PT_SC, ucp_Katakana },
{ 442, PT_SC, ucp_Kayah_Li },
{ 451, PT_SC, ucp_Kharoshthi },
{ 462, PT_SC, ucp_Khmer },
{ 468, PT_GC, ucp_L },
{ 470, PT_LAMP, 0 },
{ 473, PT_SC, ucp_Lao },
{ 477, PT_SC, ucp_Latin },
{ 483, PT_SC, ucp_Lepcha },
{ 490, PT_SC, ucp_Limbu },
{ 496, PT_SC, ucp_Linear_B },
{ 505, PT_SC, ucp_Lisu },
{ 510, PT_PC, ucp_Ll },
{ 513, PT_PC, ucp_Lm },
{ 516, PT_PC, ucp_Lo },
{ 519, PT_PC, ucp_Lt },
{ 522, PT_PC, ucp_Lu },
{ 525, PT_SC, ucp_Lycian },
{ 532, PT_SC, ucp_Lydian },
{ 539, PT_GC, ucp_M },
{ 541, PT_SC, ucp_Malayalam },
{ 551, PT_SC, ucp_Mandaic },
{ 559, PT_PC, ucp_Mc },
{ 562, PT_PC, ucp_Me },
{ 565, PT_SC, ucp_Meetei_Mayek },
{ 578, PT_SC, ucp_Meroitic_Cursive },
{ 595, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 616, PT_SC, ucp_Miao },
{ 621, PT_PC, ucp_Mn },
{ 624, PT_SC, ucp_Mongolian },
{ 634, PT_SC, ucp_Myanmar },
{ 642, PT_GC, ucp_N },
{ 644, PT_PC, ucp_Nd },
{ 647, PT_SC, ucp_New_Tai_Lue },
{ 659, PT_SC, ucp_Nko },
{ 663, PT_PC, ucp_Nl },
{ 666, PT_PC, ucp_No },
{ 669, PT_SC, ucp_Ogham },
{ 675, PT_SC, ucp_Ol_Chiki },
{ 684, PT_SC, ucp_Old_Italic },
{ 695, PT_SC, ucp_Old_Persian },
{ 707, PT_SC, ucp_Old_South_Arabian },
{ 725, PT_SC, ucp_Old_Turkic },
{ 736, PT_SC, ucp_Oriya },
{ 742, PT_SC, ucp_Osmanya },
{ 750, PT_GC, ucp_P },
{ 752, PT_PC, ucp_Pc },
{ 755, PT_PC, ucp_Pd },
{ 758, PT_PC, ucp_Pe },
{ 761, PT_PC, ucp_Pf },
{ 764, PT_SC, ucp_Phags_Pa },
{ 773, PT_SC, ucp_Phoenician },
{ 784, PT_PC, ucp_Pi },
{ 787, PT_PC, ucp_Po },
{ 790, PT_PC, ucp_Ps },
{ 793, PT_SC, ucp_Rejang },
{ 800, PT_SC, ucp_Runic },
{ 806, PT_GC, ucp_S },
{ 808, PT_SC, ucp_Samaritan },
{ 818, PT_SC, ucp_Saurashtra },
{ 829, PT_PC, ucp_Sc },
{ 832, PT_SC, ucp_Sharada },
{ 840, PT_SC, ucp_Shavian },
{ 848, PT_SC, ucp_Sinhala },
{ 856, PT_PC, ucp_Sk },
{ 859, PT_PC, ucp_Sm },
{ 862, PT_PC, ucp_So },
{ 865, PT_SC, ucp_Sora_Sompeng },
{ 878, PT_SC, ucp_Sundanese },
{ 888, PT_SC, ucp_Syloti_Nagri },
{ 901, PT_SC, ucp_Syriac },
{ 908, PT_SC, ucp_Tagalog },
{ 916, PT_SC, ucp_Tagbanwa },
{ 925, PT_SC, ucp_Tai_Le },
{ 932, PT_SC, ucp_Tai_Tham },
{ 941, PT_SC, ucp_Tai_Viet },
{ 950, PT_SC, ucp_Takri },
{ 956, PT_SC, ucp_Tamil },
{ 962, PT_SC, ucp_Telugu },
{ 969, PT_SC, ucp_Thaana },
{ 976, PT_SC, ucp_Thai },
{ 981, PT_SC, ucp_Tibetan },
{ 989, PT_SC, ucp_Tifinagh },
{ 998, PT_SC, ucp_Ugaritic },
{ 1007, PT_SC, ucp_Vai },
{ 1011, PT_ALNUM, 0 },
{ 1015, PT_PXSPACE, 0 },
{ 1019, PT_SPACE, 0 },
{ 1023, PT_UCNC, 0 },
{ 1027, PT_WORD, 0 },
{ 1031, PT_SC, ucp_Yi },
{ 1034, PT_GC, ucp_Z },
{ 1036, PT_PC, ucp_Zl },
{ 1039, PT_PC, ucp_Zp },
{ 1042, PT_PC, ucp_Zs }
{ 43, PT_SC, ucp_Bassa_Vah },
{ 53, PT_SC, ucp_Batak },
{ 59, PT_SC, ucp_Bengali },
{ 67, PT_SC, ucp_Bopomofo },
{ 76, PT_SC, ucp_Brahmi },
{ 83, PT_SC, ucp_Braille },
{ 91, PT_SC, ucp_Buginese },
{ 100, PT_SC, ucp_Buhid },
{ 106, PT_GC, ucp_C },
{ 108, PT_SC, ucp_Canadian_Aboriginal },
{ 128, PT_SC, ucp_Carian },
{ 135, PT_SC, ucp_Caucasian_Albanian },
{ 154, PT_PC, ucp_Cc },
{ 157, PT_PC, ucp_Cf },
{ 160, PT_SC, ucp_Chakma },
{ 167, PT_SC, ucp_Cham },
{ 172, PT_SC, ucp_Cherokee },
{ 181, PT_PC, ucp_Cn },
{ 184, PT_PC, ucp_Co },
{ 187, PT_SC, ucp_Common },
{ 194, PT_SC, ucp_Coptic },
{ 201, PT_PC, ucp_Cs },
{ 204, PT_SC, ucp_Cuneiform },
{ 214, PT_SC, ucp_Cypriot },
{ 222, PT_SC, ucp_Cyrillic },
{ 231, PT_SC, ucp_Deseret },
{ 239, PT_SC, ucp_Devanagari },
{ 250, PT_SC, ucp_Duployan },
{ 259, PT_SC, ucp_Egyptian_Hieroglyphs },
{ 280, PT_SC, ucp_Elbasan },
{ 288, PT_SC, ucp_Ethiopic },
{ 297, PT_SC, ucp_Georgian },
{ 306, PT_SC, ucp_Glagolitic },
{ 317, PT_SC, ucp_Gothic },
{ 324, PT_SC, ucp_Grantha },
{ 332, PT_SC, ucp_Greek },
{ 338, PT_SC, ucp_Gujarati },
{ 347, PT_SC, ucp_Gurmukhi },
{ 356, PT_SC, ucp_Han },
{ 360, PT_SC, ucp_Hangul },
{ 367, PT_SC, ucp_Hanunoo },
{ 375, PT_SC, ucp_Hebrew },
{ 382, PT_SC, ucp_Hiragana },
{ 391, PT_SC, ucp_Imperial_Aramaic },
{ 408, PT_SC, ucp_Inherited },
{ 418, PT_SC, ucp_Inscriptional_Pahlavi },
{ 440, PT_SC, ucp_Inscriptional_Parthian },
{ 463, PT_SC, ucp_Javanese },
{ 472, PT_SC, ucp_Kaithi },
{ 479, PT_SC, ucp_Kannada },
{ 487, PT_SC, ucp_Katakana },
{ 496, PT_SC, ucp_Kayah_Li },
{ 505, PT_SC, ucp_Kharoshthi },
{ 516, PT_SC, ucp_Khmer },
{ 522, PT_SC, ucp_Khojki },
{ 529, PT_SC, ucp_Khudawadi },
{ 539, PT_GC, ucp_L },
{ 541, PT_LAMP, 0 },
{ 544, PT_SC, ucp_Lao },
{ 548, PT_SC, ucp_Latin },
{ 554, PT_SC, ucp_Lepcha },
{ 561, PT_SC, ucp_Limbu },
{ 567, PT_SC, ucp_Linear_A },
{ 576, PT_SC, ucp_Linear_B },
{ 585, PT_SC, ucp_Lisu },
{ 590, PT_PC, ucp_Ll },
{ 593, PT_PC, ucp_Lm },
{ 596, PT_PC, ucp_Lo },
{ 599, PT_PC, ucp_Lt },
{ 602, PT_PC, ucp_Lu },
{ 605, PT_SC, ucp_Lycian },
{ 612, PT_SC, ucp_Lydian },
{ 619, PT_GC, ucp_M },
{ 621, PT_SC, ucp_Mahajani },
{ 630, PT_SC, ucp_Malayalam },
{ 640, PT_SC, ucp_Mandaic },
{ 648, PT_SC, ucp_Manichaean },
{ 659, PT_PC, ucp_Mc },
{ 662, PT_PC, ucp_Me },
{ 665, PT_SC, ucp_Meetei_Mayek },
{ 678, PT_SC, ucp_Mende_Kikakui },
{ 692, PT_SC, ucp_Meroitic_Cursive },
{ 709, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 730, PT_SC, ucp_Miao },
{ 735, PT_PC, ucp_Mn },
{ 738, PT_SC, ucp_Modi },
{ 743, PT_SC, ucp_Mongolian },
{ 753, PT_SC, ucp_Mro },
{ 757, PT_SC, ucp_Myanmar },
{ 765, PT_GC, ucp_N },
{ 767, PT_SC, ucp_Nabataean },
{ 777, PT_PC, ucp_Nd },
{ 780, PT_SC, ucp_New_Tai_Lue },
{ 792, PT_SC, ucp_Nko },
{ 796, PT_PC, ucp_Nl },
{ 799, PT_PC, ucp_No },
{ 802, PT_SC, ucp_Ogham },
{ 808, PT_SC, ucp_Ol_Chiki },
{ 817, PT_SC, ucp_Old_Italic },
{ 828, PT_SC, ucp_Old_North_Arabian },
{ 846, PT_SC, ucp_Old_Permic },
{ 857, PT_SC, ucp_Old_Persian },
{ 869, PT_SC, ucp_Old_South_Arabian },
{ 887, PT_SC, ucp_Old_Turkic },
{ 898, PT_SC, ucp_Oriya },
{ 904, PT_SC, ucp_Osmanya },
{ 912, PT_GC, ucp_P },
{ 914, PT_SC, ucp_Pahawh_Hmong },
{ 927, PT_SC, ucp_Palmyrene },
{ 937, PT_SC, ucp_Pau_Cin_Hau },
{ 949, PT_PC, ucp_Pc },
{ 952, PT_PC, ucp_Pd },
{ 955, PT_PC, ucp_Pe },
{ 958, PT_PC, ucp_Pf },
{ 961, PT_SC, ucp_Phags_Pa },
{ 970, PT_SC, ucp_Phoenician },
{ 981, PT_PC, ucp_Pi },
{ 984, PT_PC, ucp_Po },
{ 987, PT_PC, ucp_Ps },
{ 990, PT_SC, ucp_Psalter_Pahlavi },
{ 1006, PT_SC, ucp_Rejang },
{ 1013, PT_SC, ucp_Runic },
{ 1019, PT_GC, ucp_S },
{ 1021, PT_SC, ucp_Samaritan },
{ 1031, PT_SC, ucp_Saurashtra },
{ 1042, PT_PC, ucp_Sc },
{ 1045, PT_SC, ucp_Sharada },
{ 1053, PT_SC, ucp_Shavian },
{ 1061, PT_SC, ucp_Siddham },
{ 1069, PT_SC, ucp_Sinhala },
{ 1077, PT_PC, ucp_Sk },
{ 1080, PT_PC, ucp_Sm },
{ 1083, PT_PC, ucp_So },
{ 1086, PT_SC, ucp_Sora_Sompeng },
{ 1099, PT_SC, ucp_Sundanese },
{ 1109, PT_SC, ucp_Syloti_Nagri },
{ 1122, PT_SC, ucp_Syriac },
{ 1129, PT_SC, ucp_Tagalog },
{ 1137, PT_SC, ucp_Tagbanwa },
{ 1146, PT_SC, ucp_Tai_Le },
{ 1153, PT_SC, ucp_Tai_Tham },
{ 1162, PT_SC, ucp_Tai_Viet },
{ 1171, PT_SC, ucp_Takri },
{ 1177, PT_SC, ucp_Tamil },
{ 1183, PT_SC, ucp_Telugu },
{ 1190, PT_SC, ucp_Thaana },
{ 1197, PT_SC, ucp_Thai },
{ 1202, PT_SC, ucp_Tibetan },
{ 1210, PT_SC, ucp_Tifinagh },
{ 1219, PT_SC, ucp_Tirhuta },
{ 1227, PT_SC, ucp_Ugaritic },
{ 1236, PT_SC, ucp_Vai },
{ 1240, PT_SC, ucp_Warang_Citi },
{ 1252, PT_ALNUM, 0 },
{ 1256, PT_PXSPACE, 0 },
{ 1260, PT_SPACE, 0 },
{ 1264, PT_UCNC, 0 },
{ 1268, PT_WORD, 0 },
{ 1272, PT_SC, ucp_Yi },
{ 1275, PT_GC, ucp_Z },
{ 1277, PT_PC, ucp_Zl },
{ 1280, PT_PC, ucp_Zp },
{ 1283, PT_PC, ucp_Zs }
};
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);

File diff suppressed because it is too large Load Diff

View File

@ -229,9 +229,33 @@ enum {
ucp_Miao,
ucp_Sharada,
ucp_Sora_Sompeng,
ucp_Takri
ucp_Takri,
/* New for Unicode 7.0.0: */
ucp_Bassa_Vah,
ucp_Caucasian_Albanian,
ucp_Duployan,
ucp_Elbasan,
ucp_Grantha,
ucp_Khojki,
ucp_Khudawadi,
ucp_Linear_A,
ucp_Mahajani,
ucp_Manichaean,
ucp_Mende_Kikakui,
ucp_Modi,
ucp_Mro,
ucp_Nabataean,
ucp_Old_North_Arabian,
ucp_Old_Permic,
ucp_Pahawh_Hmong,
ucp_Palmyrene,
ucp_Psalter_Pahlavi,
ucp_Pau_Cin_Hau,
ucp_Siddham,
ucp_Tirhuta,
ucp_Warang_Citi
};
#endif
/* End of pcvre2_ucp.h */
/* End of pcre2_ucp.h */