Update for Unicode 7.0.0
This commit is contained in:
parent
1abd5a7f8d
commit
bf2bc83ed8
|
@ -21,6 +21,7 @@
|
|||
# Modfied by PH 26-February-2013 to add the Xuc special category.
|
||||
# Comment modified by PH 13-May-2014 to update to PCRE2 file names.
|
||||
# Script updated to Python 3 by running it through the 2to3 converter.
|
||||
# Added script names for Unicode 7.0.0, 20-June-2014.
|
||||
|
||||
script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \
|
||||
'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \
|
||||
|
@ -41,7 +42,12 @@ script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Bugines
|
|||
# New for Unicode 6.0.0
|
||||
'Batak', 'Brahmi', 'Mandaic', \
|
||||
# New for Unicode 6.1.0
|
||||
'Chakma', 'Meroitic_Cursive', 'Meroitic_Hieroglyphs', 'Miao', 'Sharada', 'Sora_Sompeng', 'Takri'
|
||||
'Chakma', 'Meroitic_Cursive', 'Meroitic_Hieroglyphs', 'Miao', 'Sharada', 'Sora_Sompeng', 'Takri',
|
||||
# New for Unicode 7.0.0
|
||||
'Bassa_Vah', 'Caucasian_Albanian', 'Duployan', 'Elbasan', 'Grantha', 'Khojki', 'Khudawadi',
|
||||
'Linear_A', 'Mahajani', 'Manichaean', 'Mende_Kikakui', 'Modi', 'Mro', 'Nabataean',
|
||||
'Old_North_Arabian', 'Old_Permic', 'Pahawh_Hmong', 'Palmyrene', 'Psalter_Pahlavi',
|
||||
'Pau_Cin_Hau', 'Siddham', 'Tirhuta', 'Warang_Citi'
|
||||
]
|
||||
|
||||
category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
# removed completely in 2012.
|
||||
# Corrected size calculation
|
||||
# Add #ifndef SUPPORT_UCP to use dummy tables when no UCP support is needed.
|
||||
# Update for PCRE2: name changes and SUPPORT_UCP is abolished.
|
||||
# Update for PCRE2: name changes, and SUPPORT_UCP is abolished.
|
||||
#
|
||||
# Major modifications made to this script:
|
||||
# Added code to add a grapheme break property field to records.
|
||||
|
@ -119,6 +119,7 @@
|
|||
# 30-September-2012: Added RegionalIndicator break property from Unicode 6.2.0
|
||||
# 13-May-2014: Updated for PCRE2
|
||||
# 03-June-2014: Updated for Python 3
|
||||
# 20-June-2014: Updated for Unicode 7.0.0
|
||||
##############################################################################
|
||||
|
||||
|
||||
|
@ -310,7 +311,12 @@ script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Bugines
|
|||
# New for Unicode 6.0.0
|
||||
'Batak', 'Brahmi', 'Mandaic', \
|
||||
# New for Unicode 6.1.0
|
||||
'Chakma', 'Meroitic_Cursive', 'Meroitic_Hieroglyphs', 'Miao', 'Sharada', 'Sora_Sompeng', 'Takri'
|
||||
'Chakma', 'Meroitic_Cursive', 'Meroitic_Hieroglyphs', 'Miao', 'Sharada', 'Sora_Sompeng', 'Takri',
|
||||
# New for Unicode 7.0.0
|
||||
'Bassa_Vah', 'Caucasian_Albanian', 'Duployan', 'Elbasan', 'Grantha', 'Khojki', 'Khudawadi',
|
||||
'Linear_A', 'Mahajani', 'Manichaean', 'Mende_Kikakui', 'Modi', 'Mro', 'Nabataean',
|
||||
'Old_North_Arabian', 'Old_Permic', 'Pahawh_Hmong', 'Palmyrene', 'Psalter_Pahlavi',
|
||||
'Pau_Cin_Hau', 'Siddham', 'Tirhuta', 'Warang_Citi'
|
||||
]
|
||||
|
||||
category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
|
||||
|
@ -427,7 +433,7 @@ print("table names from _pcre2_xxx to xxxx, thereby avoiding name clashes")
|
|||
print("with the library. At present, just one of these tables is actually")
|
||||
print("needed. */")
|
||||
print()
|
||||
print("#ifndef PCRE2_INCLUDED")
|
||||
print("#ifndef PCRE2_PCRE2TEST")
|
||||
print()
|
||||
print("#ifdef HAVE_CONFIG_H")
|
||||
print("#include \"config.h\"")
|
||||
|
@ -435,7 +441,7 @@ print("#endif")
|
|||
print()
|
||||
print("#include \"pcre2_internal.h\"")
|
||||
print()
|
||||
print("#endif /* PCRE2_INCLUDED */")
|
||||
print("#endif /* PCRE2_PCRE2TEST */")
|
||||
print()
|
||||
print("/* Unicode character database. */")
|
||||
print("/* This file was autogenerated by the MultiStage2.py script. */")
|
||||
|
@ -476,7 +482,7 @@ print()
|
|||
|
||||
print("/* When #included in pcre2test, we don't need this large table. */")
|
||||
print()
|
||||
print("#ifndef PCRE2_INCLUDED")
|
||||
print("#ifndef PCRE2_PCRE2TEST")
|
||||
print()
|
||||
print_records(records, record_size)
|
||||
print_table(min_stage1, 'PRIV(ucd_stage1)')
|
||||
|
@ -486,7 +492,7 @@ print("#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h")
|
|||
print("#endif")
|
||||
print("#endif /* SUPPORT_UTF */")
|
||||
print()
|
||||
print("#endif /* PCRE2_INCLUDED */")
|
||||
print("#endif /* PCRE2_PCRE2TEST */")
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# CaseFolding-6.3.0.txt
|
||||
# Date: 2012-12-20, 22:14:35 GMT [MD]
|
||||
# CaseFolding-7.0.0.txt
|
||||
# Date: 2014-04-09, 20:00:56 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2013 Unicode, Inc.
|
||||
# Copyright (c) 1991-2014 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
@ -25,7 +25,7 @@
|
|||
#
|
||||
# For information on case folding, including how to have case folding
|
||||
# preserve normalization formats, see Section 3.13 Default Case Algorithms in
|
||||
# The Unicode Standard, Version 5.0.
|
||||
# The Unicode Standard.
|
||||
#
|
||||
# ================================================================================
|
||||
# Format
|
||||
|
@ -58,8 +58,6 @@
|
|||
# All code points not explicitly listed for Case_Folding
|
||||
# have the value C for the status field, and the code point itself for the mapping field.
|
||||
|
||||
# @missing: 0000..10FFFF; C; <code point>
|
||||
|
||||
# =================================================================
|
||||
0041; C; 0061; # LATIN CAPITAL LETTER A
|
||||
0042; C; 0062; # LATIN CAPITAL LETTER B
|
||||
|
@ -298,6 +296,7 @@
|
|||
0370; C; 0371; # GREEK CAPITAL LETTER HETA
|
||||
0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI
|
||||
0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
|
||||
037F; C; 03F3; # GREEK CAPITAL LETTER YOT
|
||||
0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||
0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
|
||||
|
@ -505,6 +504,10 @@
|
|||
0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
|
||||
0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER
|
||||
0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
|
||||
0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK
|
||||
052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE
|
||||
052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE
|
||||
052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER
|
||||
0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
|
||||
0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
|
||||
0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
|
||||
|
@ -1088,6 +1091,8 @@ A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE
|
|||
A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE
|
||||
A694; C; A695; # CYRILLIC CAPITAL LETTER HWE
|
||||
A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE
|
||||
A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O
|
||||
A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O
|
||||
A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
|
||||
A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
|
||||
A726; C; A727; # LATIN CAPITAL LETTER HENG
|
||||
|
@ -1138,12 +1143,22 @@ A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO
|
|||
A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H
|
||||
A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER
|
||||
A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR
|
||||
A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH
|
||||
A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE
|
||||
A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE
|
||||
A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE
|
||||
A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE
|
||||
A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
|
||||
A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
|
||||
A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
|
||||
A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
|
||||
A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
|
||||
A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK
|
||||
A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E
|
||||
A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G
|
||||
A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT
|
||||
A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K
|
||||
A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T
|
||||
FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
|
||||
FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
|
||||
FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
|
||||
|
@ -1222,5 +1237,37 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
|
|||
10425; C; 1044D; # DESERET CAPITAL LETTER ENG
|
||||
10426; C; 1044E; # DESERET CAPITAL LETTER OI
|
||||
10427; C; 1044F; # DESERET CAPITAL LETTER EW
|
||||
118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA
|
||||
118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A
|
||||
118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI
|
||||
118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU
|
||||
118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA
|
||||
118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO
|
||||
118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II
|
||||
118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU
|
||||
118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E
|
||||
118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O
|
||||
118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG
|
||||
118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA
|
||||
118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO
|
||||
118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY
|
||||
118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ
|
||||
118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC
|
||||
118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN
|
||||
118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD
|
||||
118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE
|
||||
118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG
|
||||
118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA
|
||||
118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT
|
||||
118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM
|
||||
118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU
|
||||
118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU
|
||||
118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO
|
||||
118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO
|
||||
118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR
|
||||
118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR
|
||||
118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU
|
||||
118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII
|
||||
118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO
|
||||
#
|
||||
# EOF
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,8 +1,8 @@
|
|||
# GraphemeBreakProperty-6.3.0.txt
|
||||
# Date: 2013-03-02, 16:07:40 GMT [MD]
|
||||
# GraphemeBreakProperty-7.0.0.txt
|
||||
# Date: 2014-02-19, 15:51:21 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2013 Unicode, Inc.
|
||||
# Copyright (c) 1991-2014 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
|
@ -34,7 +34,7 @@
|
|||
000E..001F ; Control # Cc [18] <control-000E>..<control-001F>
|
||||
007F..009F ; Control # Cc [33] <control-007F>..<control-009F>
|
||||
00AD ; Control # Cf SOFT HYPHEN
|
||||
0600..0604 ; Control # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT
|
||||
0600..0605 ; Control # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE
|
||||
061C ; Control # Cf ARABIC LETTER MARK
|
||||
06DD ; Control # Cf ARABIC END OF AYAH
|
||||
070F ; Control # Cf SYRIAC ABBREVIATION MARK
|
||||
|
@ -52,6 +52,7 @@ FEFF ; Control # Cf ZERO WIDTH NO-BREAK SPACE
|
|||
FFF0..FFF8 ; Control # Cn [9] <reserved-FFF0>..<reserved-FFF8>
|
||||
FFF9..FFFB ; Control # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
|
||||
110BD ; Control # Cf KAITHI NUMBER SIGN
|
||||
1BCA0..1BCA3 ; Control # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
|
||||
1D173..1D17A ; Control # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
|
||||
E0000 ; Control # Cn <reserved-E0000>
|
||||
E0001 ; Control # Cf LANGUAGE TAG
|
||||
|
@ -60,7 +61,7 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG
|
|||
E0080..E00FF ; Control # Cn [128] <reserved-E0080>..<reserved-E00FF>
|
||||
E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
||||
|
||||
# Total code points: 6025
|
||||
# Total code points: 6030
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -88,8 +89,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
|||
0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
|
||||
0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
|
||||
0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
|
||||
08E4..08FE ; Extend # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT
|
||||
0900..0902 ; Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
|
||||
08E4..0902 ; Extend # Mn [31] ARABIC CURLY FATHA..DEVANAGARI SIGN ANUSVARA
|
||||
093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE
|
||||
093C ; Extend # Mn DEVANAGARI SIGN NUKTA
|
||||
0941..0948 ; Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
|
||||
|
@ -131,11 +131,13 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
|||
0BC0 ; Extend # Mn TAMIL VOWEL SIGN II
|
||||
0BCD ; Extend # Mn TAMIL SIGN VIRAMA
|
||||
0BD7 ; Extend # Mc TAMIL AU LENGTH MARK
|
||||
0C00 ; Extend # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE
|
||||
0C3E..0C40 ; Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
|
||||
0C46..0C48 ; Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
|
||||
0C4A..0C4D ; Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
|
||||
0C55..0C56 ; Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
|
||||
0C62..0C63 ; Extend # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
|
||||
0C81 ; Extend # Mn KANNADA SIGN CANDRABINDU
|
||||
0CBC ; Extend # Mn KANNADA SIGN NUKTA
|
||||
0CBF ; Extend # Mn KANNADA VOWEL SIGN I
|
||||
0CC2 ; Extend # Mc KANNADA VOWEL SIGN UU
|
||||
|
@ -143,6 +145,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
|||
0CCC..0CCD ; Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
|
||||
0CD5..0CD6 ; Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
|
||||
0CE2..0CE3 ; Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
|
||||
0D01 ; Extend # Mn MALAYALAM SIGN CANDRABINDU
|
||||
0D3E ; Extend # Mc MALAYALAM VOWEL SIGN AA
|
||||
0D41..0D44 ; Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
|
||||
0D4D ; Extend # Mn MALAYALAM SIGN VIRAMA
|
||||
|
@ -206,6 +209,8 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
|||
1A65..1A6C ; Extend # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
|
||||
1A73..1A7C ; Extend # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
|
||||
1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
|
||||
1AB0..1ABD ; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
|
||||
1ABE ; Extend # Me COMBINING PARENTHESES OVERLAY
|
||||
1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
|
||||
1B34 ; Extend # Mn BALINESE SIGN REREKAN
|
||||
1B36..1B3A ; Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
|
||||
|
@ -215,7 +220,7 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
|||
1B80..1B81 ; Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
|
||||
1BA2..1BA5 ; Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
|
||||
1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
|
||||
1BAB ; Extend # Mn SUNDANESE SIGN VIRAMA
|
||||
1BAB..1BAD ; Extend # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA
|
||||
1BE6 ; Extend # Mn BATAK SIGN TOMPI
|
||||
1BE8..1BE9 ; Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE
|
||||
1BED ; Extend # Mn BATAK VOWEL SIGN KARO O
|
||||
|
@ -227,7 +232,8 @@ E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
|
|||
1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
|
||||
1CED ; Extend # Mn VEDIC SIGN TIRYAK
|
||||
1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE
|
||||
1DC0..1DE6 ; Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
|
||||
1CF8..1CF9 ; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
|
||||
1DC0..1DF5 ; Extend # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE
|
||||
1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
|
||||
|
@ -258,11 +264,13 @@ A980..A982 ; Extend # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
|
|||
A9B3 ; Extend # Mn JAVANESE SIGN CECAK TELU
|
||||
A9B6..A9B9 ; Extend # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
|
||||
A9BC ; Extend # Mn JAVANESE VOWEL SIGN PEPET
|
||||
A9E5 ; Extend # Mn MYANMAR SIGN SHAN SAW
|
||||
AA29..AA2E ; Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
|
||||
AA31..AA32 ; Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
|
||||
AA35..AA36 ; Extend # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
|
||||
AA43 ; Extend # Mn CHAM CONSONANT SIGN FINAL NG
|
||||
AA4C ; Extend # Mn CHAM CONSONANT SIGN FINAL M
|
||||
AA7C ; Extend # Mn MYANMAR SIGN TAI LAING TONE-2
|
||||
AAB0 ; Extend # Mn TAI VIET MAI KANG
|
||||
AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
|
||||
AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
|
||||
|
@ -275,29 +283,61 @@ ABE8 ; Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP
|
|||
ABED ; Extend # Mn MEETEI MAYEK APUN IYEK
|
||||
FB1E ; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA
|
||||
FE00..FE0F ; Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
|
||||
FE20..FE26 ; Extend # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
|
||||
FE20..FE2D ; Extend # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON BELOW
|
||||
FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
101FD ; Extend # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
|
||||
102E0 ; Extend # Mn COPTIC EPACT THOUSANDS MARK
|
||||
10376..1037A ; Extend # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII
|
||||
10A01..10A03 ; Extend # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
|
||||
10A05..10A06 ; Extend # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
|
||||
10A0C..10A0F ; Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
|
||||
10A38..10A3A ; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
|
||||
10A3F ; Extend # Mn KHAROSHTHI VIRAMA
|
||||
10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
|
||||
11001 ; Extend # Mn BRAHMI SIGN ANUSVARA
|
||||
11038..11046 ; Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA
|
||||
11080..11081 ; Extend # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
|
||||
1107F..11081 ; Extend # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA
|
||||
110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
|
||||
110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
|
||||
11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA
|
||||
11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU
|
||||
1112D..11134 ; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA
|
||||
11173 ; Extend # Mn MAHAJANI SIGN NUKTA
|
||||
11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA
|
||||
111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
|
||||
1122F..11231 ; Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
|
||||
11234 ; Extend # Mn KHOJKI SIGN ANUSVARA
|
||||
11236..11237 ; Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
|
||||
112DF ; Extend # Mn KHUDAWADI SIGN ANUSVARA
|
||||
112E3..112EA ; Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA
|
||||
11301 ; Extend # Mn GRANTHA SIGN CANDRABINDU
|
||||
1133C ; Extend # Mn GRANTHA SIGN NUKTA
|
||||
1133E ; Extend # Mc GRANTHA VOWEL SIGN AA
|
||||
11340 ; Extend # Mn GRANTHA VOWEL SIGN II
|
||||
11357 ; Extend # Mc GRANTHA AU LENGTH MARK
|
||||
11366..1136C ; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
|
||||
11370..11374 ; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
|
||||
114B0 ; Extend # Mc TIRHUTA VOWEL SIGN AA
|
||||
114B3..114B8 ; Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
|
||||
114BA ; Extend # Mn TIRHUTA VOWEL SIGN SHORT E
|
||||
114BD ; Extend # Mc TIRHUTA VOWEL SIGN SHORT O
|
||||
114BF..114C0 ; Extend # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA
|
||||
114C2..114C3 ; Extend # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA
|
||||
115AF ; Extend # Mc SIDDHAM VOWEL SIGN AA
|
||||
115B2..115B5 ; Extend # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR
|
||||
115BC..115BD ; Extend # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA
|
||||
115BF..115C0 ; Extend # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA
|
||||
11633..1163A ; Extend # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI
|
||||
1163D ; Extend # Mn MODI SIGN ANUSVARA
|
||||
1163F..11640 ; Extend # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA
|
||||
116AB ; Extend # Mn TAKRI SIGN ANUSVARA
|
||||
116AD ; Extend # Mn TAKRI VOWEL SIGN AA
|
||||
116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU
|
||||
116B7 ; Extend # Mn TAKRI SIGN NUKTA
|
||||
16AF0..16AF4 ; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
|
||||
16B30..16B36 ; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
|
||||
16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
|
||||
1BC9D..1BC9E ; Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
|
||||
1D165 ; Extend # Mc MUSICAL SYMBOL COMBINING STEM
|
||||
1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
|
||||
1D16E..1D172 ; Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
|
||||
|
@ -305,9 +345,10 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
|
|||
1D185..1D18B ; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
|
||||
1D1AA..1D1AD ; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
|
||||
1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
|
||||
1E8D0..1E8D6 ; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
|
||||
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 1318
|
||||
# Total code points: 1461
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -386,7 +427,6 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
|||
1BA1 ; SpacingMark # Mc SUNDANESE CONSONANT SIGN PAMINGKAL
|
||||
1BA6..1BA7 ; SpacingMark # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
|
||||
1BAA ; SpacingMark # Mc SUNDANESE SIGN PAMAAEH
|
||||
1BAC..1BAD ; SpacingMark # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA
|
||||
1BE7 ; SpacingMark # Mc BATAK VOWEL SIGN E
|
||||
1BEA..1BEC ; SpacingMark # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O
|
||||
1BEE ; SpacingMark # Mc BATAK VOWEL SIGN U
|
||||
|
@ -423,6 +463,27 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
|
|||
11182 ; SpacingMark # Mc SHARADA SIGN VISARGA
|
||||
111B3..111B5 ; SpacingMark # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II
|
||||
111BF..111C0 ; SpacingMark # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
|
||||
1122C..1122E ; SpacingMark # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
|
||||
11232..11233 ; SpacingMark # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
|
||||
11235 ; SpacingMark # Mc KHOJKI SIGN VIRAMA
|
||||
112E0..112E2 ; SpacingMark # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II
|
||||
11302..11303 ; SpacingMark # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA
|
||||
1133F ; SpacingMark # Mc GRANTHA VOWEL SIGN I
|
||||
11341..11344 ; SpacingMark # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR
|
||||
11347..11348 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI
|
||||
1134B..1134D ; SpacingMark # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA
|
||||
11362..11363 ; SpacingMark # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
|
||||
114B1..114B2 ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN I..TIRHUTA VOWEL SIGN II
|
||||
114B9 ; SpacingMark # Mc TIRHUTA VOWEL SIGN E
|
||||
114BB..114BC ; SpacingMark # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O
|
||||
114BE ; SpacingMark # Mc TIRHUTA VOWEL SIGN AU
|
||||
114C1 ; SpacingMark # Mc TIRHUTA SIGN VISARGA
|
||||
115B0..115B1 ; SpacingMark # Mc [2] SIDDHAM VOWEL SIGN I..SIDDHAM VOWEL SIGN II
|
||||
115B8..115BB ; SpacingMark # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU
|
||||
115BE ; SpacingMark # Mc SIDDHAM SIGN VISARGA
|
||||
11630..11632 ; SpacingMark # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II
|
||||
1163B..1163C ; SpacingMark # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU
|
||||
1163E ; SpacingMark # Mc MODI SIGN VISARGA
|
||||
116AC ; SpacingMark # Mc TAKRI SIGN VISARGA
|
||||
116AE..116AF ; SpacingMark # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II
|
||||
116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA
|
||||
|
@ -430,7 +491,7 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK
|
|||
1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
|
||||
1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT
|
||||
|
||||
# Total code points: 290
|
||||
# Total code points: 331
|
||||
|
||||
# ================================================
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# Scripts-6.3.0.txt
|
||||
# Date: 2013-07-05, 14:09:02 GMT [MD]
|
||||
# Scripts-7.0.0.txt
|
||||
# Date: 2014-05-15, 00:11:35 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2013 Unicode, Inc.
|
||||
# Copyright (c) 1991-2014 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
|
||||
|
@ -83,8 +83,10 @@
|
|||
0385 ; Common # Sk GREEK DIALYTIKA TONOS
|
||||
0387 ; Common # Po GREEK ANO TELEIA
|
||||
0589 ; Common # Po ARMENIAN FULL STOP
|
||||
0605 ; Common # Cf ARABIC NUMBER MARK ABOVE
|
||||
060C ; Common # Po ARABIC COMMA
|
||||
061B ; Common # Po ARABIC SEMICOLON
|
||||
061C ; Common # Cf ARABIC LETTER MARK
|
||||
061F ; Common # Po ARABIC QUESTION MARK
|
||||
0640 ; Common # Lm ARABIC TATWEEL
|
||||
0660..0669 ; Common # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
|
||||
|
@ -146,7 +148,7 @@
|
|||
208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
|
||||
208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS
|
||||
208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS
|
||||
20A0..20BA ; Common # Sc [27] EURO-CURRENCY SIGN..TURKISH LIRA SIGN
|
||||
20A0..20BD ; Common # Sc [30] EURO-CURRENCY SIGN..RUBLE SIGN
|
||||
2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
|
||||
2102 ; Common # L& DOUBLE-STRUCK CAPITAL C
|
||||
2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA
|
||||
|
@ -215,7 +217,7 @@
|
|||
239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
|
||||
23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE
|
||||
23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
|
||||
23E2..23F3 ; Common # So [18] WHITE TRAPEZIUM..HOURGLASS WITH FLOWING SAND
|
||||
23E2..23FA ; Common # So [25] WHITE TRAPEZIUM..BLACK CIRCLE FOR RECORD
|
||||
2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
|
||||
2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
|
||||
2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
|
||||
|
@ -229,8 +231,7 @@
|
|||
25F8..25FF ; Common # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
|
||||
2600..266E ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
|
||||
266F ; Common # Sm MUSIC SHARP SIGN
|
||||
2670..26FF ; Common # So [144] WEST SYRIAC CROSS..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
|
||||
2701..2767 ; Common # So [103] UPPER BLADE SCISSORS..ROTATED FLORAL HEART BULLET
|
||||
2670..2767 ; Common # So [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET
|
||||
2768 ; Common # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
|
||||
2769 ; Common # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT
|
||||
276A ; Common # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
|
||||
|
@ -298,7 +299,11 @@
|
|||
2B30..2B44 ; Common # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET
|
||||
2B45..2B46 ; Common # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
|
||||
2B47..2B4C ; Common # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
|
||||
2B50..2B59 ; Common # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE
|
||||
2B4D..2B73 ; Common # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
|
||||
2B76..2B95 ; Common # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
|
||||
2B98..2BB9 ; Common # So [34] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..UP ARROWHEAD IN A RECTANGLE BOX
|
||||
2BBD..2BC8 ; Common # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED
|
||||
2BCA..2BD1 ; Common # So [8] TOP HALF BLACK CIRCLE..UNCERTAINTY SIGN
|
||||
2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
|
||||
2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET
|
||||
2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET
|
||||
|
@ -332,6 +337,10 @@
|
|||
2E2F ; Common # Lm VERTICAL TILDE
|
||||
2E30..2E39 ; Common # Po [10] RING POINT..TOP HALF SECTION SIGN
|
||||
2E3A..2E3B ; Common # Pd [2] TWO-EM DASH..THREE-EM DASH
|
||||
2E3C..2E3F ; Common # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM
|
||||
2E40 ; Common # Pd DOUBLE HYPHEN
|
||||
2E41 ; Common # Po REVERSED COMMA
|
||||
2E42 ; Common # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
|
||||
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
|
||||
3000 ; Common # Zs IDEOGRAPHIC SPACE
|
||||
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
|
||||
|
@ -395,10 +404,11 @@ A830..A835 ; Common # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC
|
|||
A836..A837 ; Common # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
|
||||
A838 ; Common # Sc NORTH INDIC RUPEE MARK
|
||||
A839 ; Common # So NORTH INDIC QUANTITY MARK
|
||||
A92E ; Common # Po KAYAH LI SIGN CWI
|
||||
A9CF ; Common # Lm JAVANESE PANGRANGKEP
|
||||
FD3E ; Common # Ps ORNATE LEFT PARENTHESIS
|
||||
FD3F ; Common # Pe ORNATE RIGHT PARENTHESIS
|
||||
FDFD ; Common # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
|
||||
AB5B ; Common # Sk MODIFIER BREVE WITH INVERTED BREVE
|
||||
FD3E ; Common # Pe ORNATE LEFT PARENTHESIS
|
||||
FD3F ; Common # Ps ORNATE RIGHT PARENTHESIS
|
||||
FE10..FE16 ; Common # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK
|
||||
FE17 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET
|
||||
FE18 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET
|
||||
|
@ -491,6 +501,8 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
|||
10137..1013F ; Common # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
|
||||
10190..1019B ; Common # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
|
||||
101D0..101FC ; Common # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
|
||||
102E1..102FB ; Common # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
|
||||
1BCA0..1BCA3 ; Common # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
|
||||
1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
|
||||
1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
|
||||
1D129..1D164 ; Common # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
|
||||
|
@ -547,10 +559,10 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
|||
1F000..1F02B ; Common # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
|
||||
1F030..1F093 ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
|
||||
1F0A0..1F0AE ; Common # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES
|
||||
1F0B1..1F0BE ; Common # So [14] PLAYING CARD ACE OF HEARTS..PLAYING CARD KING OF HEARTS
|
||||
1F0B1..1F0BF ; Common # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER
|
||||
1F0C1..1F0CF ; Common # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
|
||||
1F0D1..1F0DF ; Common # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER
|
||||
1F100..1F10A ; Common # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA
|
||||
1F0D1..1F0F5 ; Common # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21
|
||||
1F100..1F10C ; Common # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
|
||||
1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
|
||||
1F130..1F16B ; Common # So [60] SQUARED LATIN CAPITAL LETTER A..RAISED MD SIGN
|
||||
1F170..1F19A ; Common # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS
|
||||
|
@ -559,28 +571,29 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR
|
|||
1F210..1F23A ; Common # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6
|
||||
1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
|
||||
1F250..1F251 ; Common # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
|
||||
1F300..1F320 ; Common # So [33] CYCLONE..SHOOTING STAR
|
||||
1F330..1F335 ; Common # So [6] CHESTNUT..CACTUS
|
||||
1F337..1F37C ; Common # So [70] TULIP..BABY BOTTLE
|
||||
1F380..1F393 ; Common # So [20] RIBBON..GRADUATION CAP
|
||||
1F3A0..1F3C4 ; Common # So [37] CAROUSEL HORSE..SURFER
|
||||
1F3C6..1F3CA ; Common # So [5] TROPHY..SWIMMER
|
||||
1F3E0..1F3F0 ; Common # So [17] HOUSE BUILDING..EUROPEAN CASTLE
|
||||
1F400..1F43E ; Common # So [63] RAT..PAW PRINTS
|
||||
1F440 ; Common # So EYES
|
||||
1F442..1F4F7 ; Common # So [182] EAR..CAMERA
|
||||
1F4F9..1F4FC ; Common # So [4] VIDEO CAMERA..VIDEOCASSETTE
|
||||
1F500..1F53D ; Common # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE
|
||||
1F540..1F543 ; Common # So [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS
|
||||
1F550..1F567 ; Common # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY
|
||||
1F5FB..1F640 ; Common # So [70] MOUNT FUJI..WEARY CAT FACE
|
||||
1F645..1F64F ; Common # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS
|
||||
1F680..1F6C5 ; Common # So [70] ROCKET..LEFT LUGGAGE
|
||||
1F300..1F32C ; Common # So [45] CYCLONE..WIND BLOWING FACE
|
||||
1F330..1F37D ; Common # So [78] CHESTNUT..FORK AND KNIFE WITH PLATE
|
||||
1F380..1F3CE ; Common # So [79] RIBBON..RACING CAR
|
||||
1F3D4..1F3F7 ; Common # So [36] SNOW CAPPED MOUNTAIN..LABEL
|
||||
1F400..1F4FE ; Common # So [255] RAT..PORTABLE STEREO
|
||||
1F500..1F54A ; Common # So [75] TWISTED RIGHTWARDS ARROWS..DOVE OF PEACE
|
||||
1F550..1F579 ; Common # So [42] CLOCK FACE ONE OCLOCK..JOYSTICK
|
||||
1F57B..1F5A3 ; Common # So [41] LEFT HAND TELEPHONE RECEIVER..BLACK DOWN POINTING BACKHAND INDEX
|
||||
1F5A5..1F642 ; Common # So [158] DESKTOP COMPUTER..SLIGHTLY SMILING FACE
|
||||
1F645..1F6CF ; Common # So [139] FACE WITH NO GOOD GESTURE..BED
|
||||
1F6E0..1F6EC ; Common # So [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
|
||||
1F6F0..1F6F3 ; Common # So [4] SATELLITE..PASSENGER SHIP
|
||||
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
|
||||
1F780..1F7D4 ; Common # So [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR
|
||||
1F800..1F80B ; Common # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
|
||||
1F810..1F847 ; Common # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW
|
||||
1F850..1F859 ; Common # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
|
||||
1F860..1F887 ; Common # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
|
||||
1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
|
||||
E0001 ; Common # Cf LANGUAGE TAG
|
||||
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
|
||||
|
||||
# Total code points: 6418
|
||||
# Total code points: 7129
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -622,16 +635,20 @@ A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN
|
|||
A770 ; Latin # Lm MODIFIER LETTER US
|
||||
A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
|
||||
A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
|
||||
A790..A793 ; Latin # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR
|
||||
A7A0..A7AA ; Latin # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK
|
||||
A790..A7AD ; Latin # L& [30] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER L WITH BELT
|
||||
A7B0..A7B1 ; Latin # L& [2] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER TURNED T
|
||||
A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
|
||||
A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
|
||||
A7FA ; Latin # L& LATIN LETTER SMALL CAPITAL TURNED M
|
||||
A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M
|
||||
AB30..AB5A ; Latin # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
|
||||
AB5C..AB5F ; Latin # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
|
||||
AB64 ; Latin # L& LATIN SMALL LETTER INVERTED ALPHA
|
||||
FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
|
||||
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
|
||||
|
||||
# Total code points: 1272
|
||||
# Total code points: 1338
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -640,6 +657,7 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
|
|||
0376..0377 ; Greek # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
|
||||
037A ; Greek # Lm GREEK YPOGEGRAMMENI
|
||||
037B..037D ; Greek # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
|
||||
037F ; Greek # L& GREEK CAPITAL LETTER YOT
|
||||
0384 ; Greek # Sk GREEK TONOS
|
||||
0386 ; Greek # L& GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0388..038A ; Greek # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
|
@ -679,15 +697,18 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
|
|||
1FF6..1FFC ; Greek # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
|
||||
1FFD..1FFE ; Greek # Sk [2] GREEK OXIA..GREEK DASIA
|
||||
2126 ; Greek # L& OHM SIGN
|
||||
AB65 ; Greek # L& GREEK LETTER SMALL CAPITAL OMEGA
|
||||
10140..10174 ; Greek # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
|
||||
10175..10178 ; Greek # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN
|
||||
10179..10189 ; Greek # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
|
||||
1018A ; Greek # No GREEK ZERO SIGN
|
||||
1018A..1018B ; Greek # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN
|
||||
1018C ; Greek # So GREEK SINUSOID SIGN
|
||||
101A0 ; Greek # So GREEK SYMBOL TAU RHO
|
||||
1D200..1D241 ; Greek # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
|
||||
1D242..1D244 ; Greek # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
|
||||
1D245 ; Greek # So GREEK MUSICAL LEIMMA
|
||||
|
||||
# Total code points: 511
|
||||
# Total code points: 516
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -696,7 +717,7 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
|
|||
0483..0484 ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION
|
||||
0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE
|
||||
0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
|
||||
048A..0527 ; Cyrillic # L& [158] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER SHHA WITH DESCENDER
|
||||
048A..052F ; Cyrillic # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER
|
||||
1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL
|
||||
1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN
|
||||
2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
|
||||
|
@ -708,10 +729,11 @@ A673 ; Cyrillic # Po SLAVONIC ASTERISK
|
|||
A674..A67D ; Cyrillic # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK
|
||||
A67E ; Cyrillic # Po CYRILLIC KAVYKA
|
||||
A67F ; Cyrillic # Lm CYRILLIC PAYEROK
|
||||
A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
|
||||
A680..A69B ; Cyrillic # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O
|
||||
A69C..A69D ; Cyrillic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN
|
||||
A69F ; Cyrillic # Mn COMBINING CYRILLIC LETTER IOTIFIED E
|
||||
|
||||
# Total code points: 417
|
||||
# Total code points: 431
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -720,10 +742,11 @@ A69F ; Cyrillic # Mn COMBINING CYRILLIC LETTER IOTIFIED E
|
|||
055A..055F ; Armenian # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
|
||||
0561..0587 ; Armenian # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
|
||||
058A ; Armenian # Pd ARMENIAN HYPHEN
|
||||
058D..058E ; Armenian # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN
|
||||
058F ; Armenian # Sc ARMENIAN DRAM SIGN
|
||||
FB13..FB17 ; Armenian # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
|
||||
|
||||
# Total code points: 91
|
||||
# Total code points: 93
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -761,7 +784,6 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
|||
060D ; Arabic # Po ARABIC DATE SEPARATOR
|
||||
060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
|
||||
0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
|
||||
061C ; Arabic # Cf ARABIC LETTER MARK
|
||||
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
|
||||
0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
|
||||
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
|
||||
|
@ -784,9 +806,8 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
|
|||
06FD..06FE ; Arabic # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN
|
||||
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
|
||||
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE
|
||||
08A0 ; Arabic # Lo ARABIC LETTER BEH WITH SMALL V BELOW
|
||||
08A2..08AC ; Arabic # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH
|
||||
08E4..08FE ; Arabic # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT
|
||||
08A0..08B2 ; Arabic # Lo [19] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER ZAIN WITH INVERTED V ABOVE
|
||||
08E4..08FF ; Arabic # Mn [28] ARABIC CURLY FATHA..ARABIC MARK SIDEWAYS NOON GHUNNA
|
||||
FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
|
||||
FBB2..FBC1 ; Arabic # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW
|
||||
FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
|
||||
|
@ -794,6 +815,7 @@ FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIA
|
|||
FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
|
||||
FDF0..FDFB ; Arabic # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
|
||||
FDFC ; Arabic # Sc RIAL SIGN
|
||||
FDFD ; Arabic # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
|
||||
FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
|
||||
FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
|
||||
10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
|
||||
|
@ -832,7 +854,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
|
|||
1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
|
||||
1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
|
||||
|
||||
# Total code points: 1236
|
||||
# Total code points: 1244
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -875,17 +897,17 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
|
|||
0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
|
||||
0970 ; Devanagari # Po DEVANAGARI ABBREVIATION SIGN
|
||||
0971 ; Devanagari # Lm DEVANAGARI SIGN HIGH SPACING DOT
|
||||
0972..0977 ; Devanagari # Lo [6] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER UUE
|
||||
0979..097F ; Devanagari # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
|
||||
0972..097F ; Devanagari # Lo [14] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER BBA
|
||||
A8E0..A8F1 ; Devanagari # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
|
||||
A8F2..A8F7 ; Devanagari # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
|
||||
A8F8..A8FA ; Devanagari # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET
|
||||
A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
||||
|
||||
# Total code points: 151
|
||||
# Total code points: 152
|
||||
|
||||
# ================================================
|
||||
|
||||
0980 ; Bengali # Lo BENGALI ANJI
|
||||
0981 ; Bengali # Mn BENGALI SIGN CANDRABINDU
|
||||
0982..0983 ; Bengali # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
|
||||
0985..098C ; Bengali # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
|
||||
|
@ -913,7 +935,7 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
|||
09FA ; Bengali # So BENGALI ISSHAR
|
||||
09FB ; Bengali # Sc BENGALI GANDA MARK
|
||||
|
||||
# Total code points: 92
|
||||
# Total code points: 93
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1030,12 +1052,12 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
|||
|
||||
# ================================================
|
||||
|
||||
0C00 ; Telugu # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE
|
||||
0C01..0C03 ; Telugu # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
|
||||
0C05..0C0C ; Telugu # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L
|
||||
0C0E..0C10 ; Telugu # Lo [3] TELUGU LETTER E..TELUGU LETTER AI
|
||||
0C12..0C28 ; Telugu # Lo [23] TELUGU LETTER O..TELUGU LETTER NA
|
||||
0C2A..0C33 ; Telugu # Lo [10] TELUGU LETTER PA..TELUGU LETTER LLA
|
||||
0C35..0C39 ; Telugu # Lo [5] TELUGU LETTER VA..TELUGU LETTER HA
|
||||
0C2A..0C39 ; Telugu # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA
|
||||
0C3D ; Telugu # Lo TELUGU SIGN AVAGRAHA
|
||||
0C3E..0C40 ; Telugu # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
|
||||
0C41..0C44 ; Telugu # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
|
||||
|
@ -1049,10 +1071,11 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
|||
0C78..0C7E ; Telugu # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
|
||||
0C7F ; Telugu # So TELUGU SIGN TUUMU
|
||||
|
||||
# Total code points: 93
|
||||
# Total code points: 95
|
||||
|
||||
# ================================================
|
||||
|
||||
0C81 ; Kannada # Mn KANNADA SIGN CANDRABINDU
|
||||
0C82..0C83 ; Kannada # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
|
||||
0C85..0C8C ; Kannada # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
|
||||
0C8E..0C90 ; Kannada # Lo [3] KANNADA LETTER E..KANNADA LETTER AI
|
||||
|
@ -1075,10 +1098,11 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
|||
0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
|
||||
0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
|
||||
|
||||
# Total code points: 86
|
||||
# Total code points: 87
|
||||
|
||||
# ================================================
|
||||
|
||||
0D01 ; Malayalam # Mn MALAYALAM SIGN CANDRABINDU
|
||||
0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
|
||||
0D05..0D0C ; Malayalam # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
|
||||
0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
|
||||
|
@ -1098,7 +1122,7 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
|||
0D79 ; Malayalam # So MALAYALAM DATE MARK
|
||||
0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
|
||||
|
||||
# Total code points: 98
|
||||
# Total code points: 99
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1113,10 +1137,12 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
|||
0DD2..0DD4 ; Sinhala # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
|
||||
0DD6 ; Sinhala # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA
|
||||
0DD8..0DDF ; Sinhala # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA
|
||||
0DE6..0DEF ; Sinhala # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE
|
||||
0DF2..0DF3 ; Sinhala # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
|
||||
0DF4 ; Sinhala # Po SINHALA PUNCTUATION KUNDDALIYA
|
||||
111E1..111F4 ; Sinhala # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND
|
||||
|
||||
# Total code points: 80
|
||||
# Total code points: 110
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1239,14 +1265,23 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE
|
|||
109A..109C ; Myanmar # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A
|
||||
109D ; Myanmar # Mn MYANMAR VOWEL SIGN AITON AI
|
||||
109E..109F ; Myanmar # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION
|
||||
A9E0..A9E4 ; Myanmar # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA
|
||||
A9E5 ; Myanmar # Mn MYANMAR SIGN SHAN SAW
|
||||
A9E6 ; Myanmar # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION
|
||||
A9E7..A9EF ; Myanmar # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA
|
||||
A9F0..A9F9 ; Myanmar # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE
|
||||
A9FA..A9FE ; Myanmar # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA
|
||||
AA60..AA6F ; Myanmar # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA
|
||||
AA70 ; Myanmar # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
|
||||
AA71..AA76 ; Myanmar # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM
|
||||
AA77..AA79 ; Myanmar # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO
|
||||
AA7A ; Myanmar # Lo MYANMAR LETTER AITON RA
|
||||
AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE
|
||||
AA7C ; Myanmar # Mn MYANMAR SIGN TAI LAING TONE-2
|
||||
AA7D ; Myanmar # Mc MYANMAR SIGN TAI LAING TONE-5
|
||||
AA7E..AA7F ; Myanmar # Lo [2] MYANMAR LETTER SHWE PALAUNG CHA..MYANMAR LETTER SHWE PALAUNG SHA
|
||||
|
||||
# Total code points: 188
|
||||
# Total code points: 223
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1350,8 +1385,9 @@ AB28..AB2E ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO
|
|||
|
||||
16A0..16EA ; Runic # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
|
||||
16EE..16F0 ; Runic # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
|
||||
16F1..16F8 ; Runic # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC
|
||||
|
||||
# Total code points: 78
|
||||
# Total code points: 86
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1457,10 +1493,10 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
|
|||
|
||||
# ================================================
|
||||
|
||||
10300..1031E ; Old_Italic # Lo [31] OLD ITALIC LETTER A..OLD ITALIC LETTER UU
|
||||
10300..1031F ; Old_Italic # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS
|
||||
10320..10323 ; Old_Italic # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
|
||||
|
||||
# Total code points: 35
|
||||
# Total code points: 36
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1484,12 +1520,15 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
|
|||
064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
|
||||
0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF
|
||||
0951..0952 ; Inherited # Mn [2] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI STRESS SIGN ANUDATTA
|
||||
1AB0..1ABD ; Inherited # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
|
||||
1ABE ; Inherited # Me COMBINING PARENTHESES OVERLAY
|
||||
1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
|
||||
1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
|
||||
1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
|
||||
1CED ; Inherited # Mn VEDIC SIGN TIRYAK
|
||||
1CF4 ; Inherited # Mn VEDIC TONE CANDRA ABOVE
|
||||
1DC0..1DE6 ; Inherited # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
|
||||
1CF8..1CF9 ; Inherited # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
|
||||
1DC0..1DF5 ; Inherited # Mn [54] COMBINING DOTTED GRAVE ACCENT..COMBINING UP TACK ABOVE
|
||||
1DFC..1DFF ; Inherited # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
|
||||
200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
|
||||
20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
|
||||
|
@ -1500,15 +1539,16 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE
|
|||
302A..302D ; Inherited # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
|
||||
3099..309A ; Inherited # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
FE00..FE0F ; Inherited # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
|
||||
FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
|
||||
FE20..FE2D ; Inherited # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON BELOW
|
||||
101FD ; Inherited # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
|
||||
102E0 ; Inherited # Mn COPTIC EPACT THOUSANDS MARK
|
||||
1D167..1D169 ; Inherited # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
|
||||
1D17B..1D182 ; Inherited # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
|
||||
1D185..1D18B ; Inherited # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
|
||||
1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
|
||||
E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
|
||||
|
||||
# Total code points: 523
|
||||
# Total code points: 563
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1542,7 +1582,7 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
|
|||
|
||||
# ================================================
|
||||
|
||||
1900..191C ; Limbu # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
|
||||
1900..191E ; Limbu # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA
|
||||
1920..1922 ; Limbu # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
|
||||
1923..1926 ; Limbu # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
|
||||
1927..1928 ; Limbu # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
|
||||
|
@ -1555,7 +1595,7 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2
|
|||
1944..1945 ; Limbu # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
|
||||
1946..194F ; Limbu # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
|
||||
|
||||
# Total code points: 66
|
||||
# Total code points: 68
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1730,11 +1770,11 @@ A828..A82B ; Syloti_Nagri # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI
|
|||
|
||||
# ================================================
|
||||
|
||||
12000..1236E ; Cuneiform # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
|
||||
12400..12462 ; Cuneiform # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
|
||||
12470..12473 ; Cuneiform # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON
|
||||
12000..12398 ; Cuneiform # Lo [921] CUNEIFORM SIGN A..CUNEIFORM SIGN UM TIMES ME
|
||||
12400..1246E ; Cuneiform # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
|
||||
12470..12474 ; Cuneiform # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON
|
||||
|
||||
# Total code points: 982
|
||||
# Total code points: 1037
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -1773,8 +1813,7 @@ A874..A877 ; Phags_Pa # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOU
|
|||
1BA6..1BA7 ; Sundanese # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
|
||||
1BA8..1BA9 ; Sundanese # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
|
||||
1BAA ; Sundanese # Mc SUNDANESE SIGN PAMAAEH
|
||||
1BAB ; Sundanese # Mn SUNDANESE SIGN VIRAMA
|
||||
1BAC..1BAD ; Sundanese # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA
|
||||
1BAB..1BAD ; Sundanese # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA
|
||||
1BAE..1BAF ; Sundanese # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA
|
||||
1BB0..1BB9 ; Sundanese # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
|
||||
1BBA..1BBF ; Sundanese # Lo [6] SUNDANESE AVAGRAHA..SUNDANESE LETTER FINAL M
|
||||
|
@ -1831,9 +1870,9 @@ A8D0..A8D9 ; Saurashtra # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NI
|
|||
A900..A909 ; Kayah_Li # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
|
||||
A90A..A925 ; Kayah_Li # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO
|
||||
A926..A92D ; Kayah_Li # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
|
||||
A92E..A92F ; Kayah_Li # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA
|
||||
A92F ; Kayah_Li # Po KAYAH LI SIGN SHYA
|
||||
|
||||
# Total code points: 48
|
||||
# Total code points: 47
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -2085,8 +2124,9 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
|||
11047..1104D ; Brahmi # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
|
||||
11052..11065 ; Brahmi # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND
|
||||
11066..1106F ; Brahmi # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE
|
||||
1107F ; Brahmi # Mn BRAHMI NUMBER JOINER
|
||||
|
||||
# Total code points: 108
|
||||
# Total code points: 109
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -2141,9 +2181,11 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
|||
111BF..111C0 ; Sharada # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA
|
||||
111C1..111C4 ; Sharada # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM
|
||||
111C5..111C8 ; Sharada # Po [4] SHARADA DANDA..SHARADA SEPARATOR
|
||||
111CD ; Sharada # Po SHARADA SUTRA MARK
|
||||
111D0..111D9 ; Sharada # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE
|
||||
111DA ; Sharada # Lo SHARADA EKAM
|
||||
|
||||
# Total code points: 83
|
||||
# Total code points: 85
|
||||
|
||||
# ================================================
|
||||
|
||||
|
@ -2166,4 +2208,244 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
|
|||
|
||||
# Total code points: 66
|
||||
|
||||
# ================================================
|
||||
|
||||
10530..10563 ; Caucasian_Albanian # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW
|
||||
1056F ; Caucasian_Albanian # Po CAUCASIAN ALBANIAN CITATION MARK
|
||||
|
||||
# Total code points: 53
|
||||
|
||||
# ================================================
|
||||
|
||||
16AD0..16AED ; Bassa_Vah # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I
|
||||
16AF0..16AF4 ; Bassa_Vah # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
|
||||
16AF5 ; Bassa_Vah # Po BASSA VAH FULL STOP
|
||||
|
||||
# Total code points: 36
|
||||
|
||||
# ================================================
|
||||
|
||||
1BC00..1BC6A ; Duployan # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
|
||||
1BC70..1BC7C ; Duployan # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
|
||||
1BC80..1BC88 ; Duployan # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
|
||||
1BC90..1BC99 ; Duployan # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW
|
||||
1BC9C ; Duployan # So DUPLOYAN SIGN O WITH CROSS
|
||||
1BC9D..1BC9E ; Duployan # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
|
||||
1BC9F ; Duployan # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
|
||||
|
||||
# Total code points: 143
|
||||
|
||||
# ================================================
|
||||
|
||||
10500..10527 ; Elbasan # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE
|
||||
|
||||
# Total code points: 40
|
||||
|
||||
# ================================================
|
||||
|
||||
11301 ; Grantha # Mn GRANTHA SIGN CANDRABINDU
|
||||
11302..11303 ; Grantha # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA
|
||||
11305..1130C ; Grantha # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L
|
||||
1130F..11310 ; Grantha # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI
|
||||
11313..11328 ; Grantha # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA
|
||||
1132A..11330 ; Grantha # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA
|
||||
11332..11333 ; Grantha # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA
|
||||
11335..11339 ; Grantha # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA
|
||||
1133C ; Grantha # Mn GRANTHA SIGN NUKTA
|
||||
1133D ; Grantha # Lo GRANTHA SIGN AVAGRAHA
|
||||
1133E..1133F ; Grantha # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I
|
||||
11340 ; Grantha # Mn GRANTHA VOWEL SIGN II
|
||||
11341..11344 ; Grantha # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR
|
||||
11347..11348 ; Grantha # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI
|
||||
1134B..1134D ; Grantha # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA
|
||||
11357 ; Grantha # Mc GRANTHA AU LENGTH MARK
|
||||
1135D..11361 ; Grantha # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL
|
||||
11362..11363 ; Grantha # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
|
||||
11366..1136C ; Grantha # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
|
||||
11370..11374 ; Grantha # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
|
||||
|
||||
# Total code points: 83
|
||||
|
||||
# ================================================
|
||||
|
||||
16B00..16B2F ; Pahawh_Hmong # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU
|
||||
16B30..16B36 ; Pahawh_Hmong # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
|
||||
16B37..16B3B ; Pahawh_Hmong # Po [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM
|
||||
16B3C..16B3F ; Pahawh_Hmong # So [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB
|
||||
16B40..16B43 ; Pahawh_Hmong # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM
|
||||
16B44 ; Pahawh_Hmong # Po PAHAWH HMONG SIGN XAUS
|
||||
16B45 ; Pahawh_Hmong # So PAHAWH HMONG SIGN CIM TSOV ROG
|
||||
16B50..16B59 ; Pahawh_Hmong # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE
|
||||
16B5B..16B61 ; Pahawh_Hmong # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS
|
||||
16B63..16B77 ; Pahawh_Hmong # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS
|
||||
16B7D..16B8F ; Pahawh_Hmong # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ
|
||||
|
||||
# Total code points: 127
|
||||
|
||||
# ================================================
|
||||
|
||||
11200..11211 ; Khojki # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA
|
||||
11213..1122B ; Khojki # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA
|
||||
1122C..1122E ; Khojki # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
|
||||
1122F..11231 ; Khojki # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
|
||||
11232..11233 ; Khojki # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
|
||||
11234 ; Khojki # Mn KHOJKI SIGN ANUSVARA
|
||||
11235 ; Khojki # Mc KHOJKI SIGN VIRAMA
|
||||
11236..11237 ; Khojki # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
|
||||
11238..1123D ; Khojki # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN
|
||||
|
||||
# Total code points: 61
|
||||
|
||||
# ================================================
|
||||
|
||||
10600..10736 ; Linear_A # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
|
||||
10740..10755 ; Linear_A # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
|
||||
10760..10767 ; Linear_A # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
|
||||
|
||||
# Total code points: 341
|
||||
|
||||
# ================================================
|
||||
|
||||
11150..11172 ; Mahajani # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA
|
||||
11173 ; Mahajani # Mn MAHAJANI SIGN NUKTA
|
||||
11174..11175 ; Mahajani # Po [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK
|
||||
11176 ; Mahajani # Lo MAHAJANI LIGATURE SHRI
|
||||
|
||||
# Total code points: 39
|
||||
|
||||
# ================================================
|
||||
|
||||
10AC0..10AC7 ; Manichaean # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW
|
||||
10AC8 ; Manichaean # So MANICHAEAN SIGN UD
|
||||
10AC9..10AE4 ; Manichaean # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW
|
||||
10AE5..10AE6 ; Manichaean # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
|
||||
10AEB..10AEF ; Manichaean # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED
|
||||
10AF0..10AF6 ; Manichaean # Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER
|
||||
|
||||
# Total code points: 51
|
||||
|
||||
# ================================================
|
||||
|
||||
1E800..1E8C4 ; Mende_Kikakui # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON
|
||||
1E8C7..1E8CF ; Mende_Kikakui # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE
|
||||
1E8D0..1E8D6 ; Mende_Kikakui # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
|
||||
|
||||
# Total code points: 213
|
||||
|
||||
# ================================================
|
||||
|
||||
11600..1162F ; Modi # Lo [48] MODI LETTER A..MODI LETTER LLA
|
||||
11630..11632 ; Modi # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II
|
||||
11633..1163A ; Modi # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI
|
||||
1163B..1163C ; Modi # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU
|
||||
1163D ; Modi # Mn MODI SIGN ANUSVARA
|
||||
1163E ; Modi # Mc MODI SIGN VISARGA
|
||||
1163F..11640 ; Modi # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA
|
||||
11641..11643 ; Modi # Po [3] MODI DANDA..MODI ABBREVIATION SIGN
|
||||
11644 ; Modi # Lo MODI SIGN HUVA
|
||||
11650..11659 ; Modi # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE
|
||||
|
||||
# Total code points: 79
|
||||
|
||||
# ================================================
|
||||
|
||||
16A40..16A5E ; Mro # Lo [31] MRO LETTER TA..MRO LETTER TEK
|
||||
16A60..16A69 ; Mro # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE
|
||||
16A6E..16A6F ; Mro # Po [2] MRO DANDA..MRO DOUBLE DANDA
|
||||
|
||||
# Total code points: 43
|
||||
|
||||
# ================================================
|
||||
|
||||
10A80..10A9C ; Old_North_Arabian # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH
|
||||
10A9D..10A9F ; Old_North_Arabian # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY
|
||||
|
||||
# Total code points: 32
|
||||
|
||||
# ================================================
|
||||
|
||||
10880..1089E ; Nabataean # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW
|
||||
108A7..108AF ; Nabataean # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED
|
||||
|
||||
# Total code points: 40
|
||||
|
||||
# ================================================
|
||||
|
||||
10860..10876 ; Palmyrene # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW
|
||||
10877..10878 ; Palmyrene # So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON
|
||||
10879..1087F ; Palmyrene # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY
|
||||
|
||||
# Total code points: 32
|
||||
|
||||
# ================================================
|
||||
|
||||
11AC0..11AF8 ; Pau_Cin_Hau # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
|
||||
|
||||
# Total code points: 57
|
||||
|
||||
# ================================================
|
||||
|
||||
10350..10375 ; Old_Permic # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA
|
||||
10376..1037A ; Old_Permic # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII
|
||||
|
||||
# Total code points: 43
|
||||
|
||||
# ================================================
|
||||
|
||||
10B80..10B91 ; Psalter_Pahlavi # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW
|
||||
10B99..10B9C ; Psalter_Pahlavi # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT
|
||||
10BA9..10BAF ; Psalter_Pahlavi # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED
|
||||
|
||||
# Total code points: 29
|
||||
|
||||
# ================================================
|
||||
|
||||
11580..115AE ; Siddham # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA
|
||||
115AF..115B1 ; Siddham # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II
|
||||
115B2..115B5 ; Siddham # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR
|
||||
115B8..115BB ; Siddham # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU
|
||||
115BC..115BD ; Siddham # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA
|
||||
115BE ; Siddham # Mc SIDDHAM SIGN VISARGA
|
||||
115BF..115C0 ; Siddham # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA
|
||||
115C1..115C9 ; Siddham # Po [9] SIDDHAM SIGN SIDDHAM..SIDDHAM END OF TEXT MARK
|
||||
|
||||
# Total code points: 72
|
||||
|
||||
# ================================================
|
||||
|
||||
112B0..112DE ; Khudawadi # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA
|
||||
112DF ; Khudawadi # Mn KHUDAWADI SIGN ANUSVARA
|
||||
112E0..112E2 ; Khudawadi # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II
|
||||
112E3..112EA ; Khudawadi # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA
|
||||
112F0..112F9 ; Khudawadi # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE
|
||||
|
||||
# Total code points: 69
|
||||
|
||||
# ================================================
|
||||
|
||||
11480..114AF ; Tirhuta # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA
|
||||
114B0..114B2 ; Tirhuta # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II
|
||||
114B3..114B8 ; Tirhuta # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
|
||||
114B9 ; Tirhuta # Mc TIRHUTA VOWEL SIGN E
|
||||
114BA ; Tirhuta # Mn TIRHUTA VOWEL SIGN SHORT E
|
||||
114BB..114BE ; Tirhuta # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU
|
||||
114BF..114C0 ; Tirhuta # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA
|
||||
114C1 ; Tirhuta # Mc TIRHUTA SIGN VISARGA
|
||||
114C2..114C3 ; Tirhuta # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA
|
||||
114C4..114C5 ; Tirhuta # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG
|
||||
114C6 ; Tirhuta # Po TIRHUTA ABBREVIATION SIGN
|
||||
114C7 ; Tirhuta # Lo TIRHUTA OM
|
||||
114D0..114D9 ; Tirhuta # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
|
||||
|
||||
# Total code points: 82
|
||||
|
||||
# ================================================
|
||||
|
||||
118A0..118DF ; Warang_Citi # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO
|
||||
118E0..118E9 ; Warang_Citi # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
|
||||
118EA..118F2 ; Warang_Citi # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY
|
||||
118FF ; Warang_Citi # Lo WARANG CITI OM
|
||||
|
||||
# Total code points: 84
|
||||
|
||||
# EOF
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2,7 +2,7 @@
|
|||
* A program for testing the Unicode property table *
|
||||
***************************************************/
|
||||
|
||||
/* Copyright (c) University of Cambridge 2008 */
|
||||
/* Copyright (c) University of Cambridge 2008 - 2014 */
|
||||
|
||||
/* Compile thus:
|
||||
gcc -DHAVE_CONFIG_H -DPCRE2_CODE_UNIT_WIDTH=8 -o ucptest \
|
||||
|
@ -236,6 +236,30 @@ switch(script)
|
|||
case ucp_Sora_Sompeng: scriptname = US"Sora Sompent"; break;
|
||||
case ucp_Takri: scriptname = US"Takri"; break;
|
||||
|
||||
/* New for Unicode 7.0.0 */
|
||||
case ucp_Bassa_Vah: scriptname = US"Bassa_Vah"; break;
|
||||
case ucp_Caucasian_Albanian: scriptname = US"Caucasian_Albanian"; break;
|
||||
case ucp_Duployan: scriptname = US"Duployan"; break;
|
||||
case ucp_Elbasan: scriptname = US"Elbasan"; break;
|
||||
case ucp_Grantha: scriptname = US"Grantha"; break;
|
||||
case ucp_Khojki: scriptname = US"Khojki"; break;
|
||||
case ucp_Khudawadi: scriptname = US"Khudawadi"; break;
|
||||
case ucp_Linear_A: scriptname = US"Linear_A"; break;
|
||||
case ucp_Mahajani: scriptname = US"Mahajani"; break;
|
||||
case ucp_Manichaean: scriptname = US"Manichaean"; break;
|
||||
case ucp_Mende_Kikakui: scriptname = US"Mende_Kikakui"; break;
|
||||
case ucp_Modi: scriptname = US"Modi"; break;
|
||||
case ucp_Mro: scriptname = US"Mro"; break;
|
||||
case ucp_Nabataean: scriptname = US"Nabataean"; break;
|
||||
case ucp_Old_North_Arabian: scriptname = US"Old_North_Arabian"; break;
|
||||
case ucp_Old_Permic: scriptname = US"Old_Permic"; break;
|
||||
case ucp_Pahawh_Hmong: scriptname = US"Pahawh_Hmong"; break;
|
||||
case ucp_Palmyrene: scriptname = US"Palmyrene"; break;
|
||||
case ucp_Psalter_Pahlavi: scriptname = US"Psalter_Pahlavi"; break;
|
||||
case ucp_Pau_Cin_Hau: scriptname = US"Pau_Cin_Hau"; break;
|
||||
case ucp_Siddham: scriptname = US"Siddham"; break;
|
||||
case ucp_Tirhuta: scriptname = US"Tirhuta"; break;
|
||||
case ucp_Warang_Citi: scriptname = US"Warang_Citi"; break;
|
||||
}
|
||||
|
||||
printf("%04x %s: %s, %s, %s", c, typename, fulltypename, scriptname, graphbreak);
|
||||
|
|
|
@ -32,3 +32,5 @@ findprop 10b00 10b35 13000 1342e 10840 10855
|
|||
findprop 11100 1113c 11680 116c0
|
||||
|
||||
findprop 0d 0a 0e 0711 1b04 1111 1169 11fe ae4c ad89
|
||||
|
||||
findprop 118a0 11ac7 16ad0
|
||||
|
|
|
@ -357,3 +357,8 @@ findprop 0d 0a 0e 0711 1b04 1111 1169 11fe ae4c ad89
|
|||
11fe Letter: Other letter, Hangul, Hangul syllable type T
|
||||
ae4c Letter: Other letter, Hangul, Hangul syllable type LV
|
||||
ad89 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
|
||||
findprop 118a0 11ac7 16ad0
|
||||
118a0 Letter: Upper case letter, Warang_Citi, Other, 118c0
|
||||
11ac7 Letter: Other letter, Pau_Cin_Hau, Other
|
||||
16ad0 Letter: Other letter, Bassa_Vah, Other
|
||||
|
|
|
@ -127,7 +127,7 @@ ucp_gbXX values defined in pcre2_ucp.h. The 2-dimensional table is indexed by
|
|||
the properties of two adjacent code points. The left property selects a word
|
||||
from the table, and the right property selects a bit from that word like this:
|
||||
|
||||
ucp_gbtable[left-property] & (1 << right-property)
|
||||
PRIV(ucp_gbtable)[left-property] & (1 << right-property)
|
||||
|
||||
The value is non-zero if a grapheme break is NOT permitted between the relevant
|
||||
two code points. The breaking rules are as follows:
|
||||
|
@ -155,7 +155,7 @@ are implementing).
|
|||
7. Otherwise, break everywhere.
|
||||
*/
|
||||
|
||||
const uint32_t PRIV(ucp_gbtable[]) = {
|
||||
const uint32_t PRIV(ucp_gbtable)[] = {
|
||||
(1<<ucp_gbLF), /* 0 CR */
|
||||
0, /* 1 LF */
|
||||
0, /* 2 Control */
|
||||
|
@ -196,7 +196,7 @@ const int PRIV(ucp_typerange)[] = {
|
|||
};
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* The pcre_utt[] table below translates Unicode property names into type and
|
||||
/* The PRIV(utt)[] table below translates Unicode property names into type and
|
||||
code values. It is searched by binary chop, so must be in collating sequence of
|
||||
name. Originally, the table contained pointers to the name strings in the first
|
||||
field of each entry. However, that leads to a large number of relocations when
|
||||
|
@ -219,6 +219,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
|
||||
#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
|
||||
#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
|
||||
#define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0"
|
||||
#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
|
||||
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
|
||||
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
|
||||
|
@ -229,6 +230,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_C0 STR_C "\0"
|
||||
#define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0"
|
||||
#define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0"
|
||||
#define STRING_Caucasian_Albanian0 STR_C STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0"
|
||||
#define STRING_Cc0 STR_C STR_c "\0"
|
||||
#define STRING_Cf0 STR_C STR_f "\0"
|
||||
#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
|
||||
|
@ -244,11 +246,14 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
|
||||
#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
|
||||
#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
|
||||
#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
|
||||
#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
|
||||
#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
|
||||
#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
|
||||
#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
|
||||
#define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0"
|
||||
#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0"
|
||||
#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
|
||||
#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
|
||||
#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
|
||||
|
@ -268,12 +273,15 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
|
||||
#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
|
||||
#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
|
||||
#define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0"
|
||||
#define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
|
||||
#define STRING_L0 STR_L "\0"
|
||||
#define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0"
|
||||
#define STRING_Lao0 STR_L STR_a STR_o "\0"
|
||||
#define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0"
|
||||
#define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0"
|
||||
#define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0"
|
||||
#define STRING_Linear_A0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_A "\0"
|
||||
#define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0"
|
||||
#define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0"
|
||||
#define STRING_Ll0 STR_L STR_l "\0"
|
||||
|
@ -284,18 +292,24 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0"
|
||||
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
|
||||
#define STRING_M0 STR_M "\0"
|
||||
#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
|
||||
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
|
||||
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
|
||||
#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
|
||||
#define STRING_Mc0 STR_M STR_c "\0"
|
||||
#define STRING_Me0 STR_M STR_e "\0"
|
||||
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
|
||||
#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0"
|
||||
#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
|
||||
#define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
|
||||
#define STRING_Miao0 STR_M STR_i STR_a STR_o "\0"
|
||||
#define STRING_Mn0 STR_M STR_n "\0"
|
||||
#define STRING_Modi0 STR_M STR_o STR_d STR_i "\0"
|
||||
#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
|
||||
#define STRING_Mro0 STR_M STR_r STR_o "\0"
|
||||
#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
|
||||
#define STRING_N0 STR_N "\0"
|
||||
#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
|
||||
#define STRING_Nd0 STR_N STR_d "\0"
|
||||
#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
|
||||
#define STRING_Nko0 STR_N STR_k STR_o "\0"
|
||||
|
@ -304,12 +318,17 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
|
||||
#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
|
||||
#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
|
||||
#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
|
||||
#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
|
||||
#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
|
||||
#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
|
||||
#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
|
||||
#define STRING_P0 STR_P "\0"
|
||||
#define STRING_Pahawh_Hmong0 STR_P STR_a STR_h STR_a STR_w STR_h STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
|
||||
#define STRING_Palmyrene0 STR_P STR_a STR_l STR_m STR_y STR_r STR_e STR_n STR_e "\0"
|
||||
#define STRING_Pau_Cin_Hau0 STR_P STR_a STR_u STR_UNDERSCORE STR_C STR_i STR_n STR_UNDERSCORE STR_H STR_a STR_u "\0"
|
||||
#define STRING_Pc0 STR_P STR_c "\0"
|
||||
#define STRING_Pd0 STR_P STR_d "\0"
|
||||
#define STRING_Pe0 STR_P STR_e "\0"
|
||||
|
@ -319,6 +338,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Pi0 STR_P STR_i "\0"
|
||||
#define STRING_Po0 STR_P STR_o "\0"
|
||||
#define STRING_Ps0 STR_P STR_s "\0"
|
||||
#define STRING_Psalter_Pahlavi0 STR_P STR_s STR_a STR_l STR_t STR_e STR_r STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
|
||||
#define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0"
|
||||
#define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0"
|
||||
#define STRING_S0 STR_S "\0"
|
||||
|
@ -327,6 +347,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Sc0 STR_S STR_c "\0"
|
||||
#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
|
||||
#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
|
||||
#define STRING_Siddham0 STR_S STR_i STR_d STR_d STR_h STR_a STR_m "\0"
|
||||
#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
|
||||
#define STRING_Sk0 STR_S STR_k "\0"
|
||||
#define STRING_Sm0 STR_S STR_m "\0"
|
||||
|
@ -347,8 +368,10 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
|
|||
#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
|
||||
#define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0"
|
||||
#define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
|
||||
#define STRING_Tirhuta0 STR_T STR_i STR_r STR_h STR_u STR_t STR_a "\0"
|
||||
#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
|
||||
#define STRING_Vai0 STR_V STR_a STR_i "\0"
|
||||
#define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0"
|
||||
#define STRING_Xan0 STR_X STR_a STR_n "\0"
|
||||
#define STRING_Xps0 STR_X STR_p STR_s "\0"
|
||||
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
|
||||
|
@ -367,6 +390,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Avestan0
|
||||
STRING_Balinese0
|
||||
STRING_Bamum0
|
||||
STRING_Bassa_Vah0
|
||||
STRING_Batak0
|
||||
STRING_Bengali0
|
||||
STRING_Bopomofo0
|
||||
|
@ -377,6 +401,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_C0
|
||||
STRING_Canadian_Aboriginal0
|
||||
STRING_Carian0
|
||||
STRING_Caucasian_Albanian0
|
||||
STRING_Cc0
|
||||
STRING_Cf0
|
||||
STRING_Chakma0
|
||||
|
@ -392,11 +417,14 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Cyrillic0
|
||||
STRING_Deseret0
|
||||
STRING_Devanagari0
|
||||
STRING_Duployan0
|
||||
STRING_Egyptian_Hieroglyphs0
|
||||
STRING_Elbasan0
|
||||
STRING_Ethiopic0
|
||||
STRING_Georgian0
|
||||
STRING_Glagolitic0
|
||||
STRING_Gothic0
|
||||
STRING_Grantha0
|
||||
STRING_Greek0
|
||||
STRING_Gujarati0
|
||||
STRING_Gurmukhi0
|
||||
|
@ -416,12 +444,15 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Kayah_Li0
|
||||
STRING_Kharoshthi0
|
||||
STRING_Khmer0
|
||||
STRING_Khojki0
|
||||
STRING_Khudawadi0
|
||||
STRING_L0
|
||||
STRING_L_AMPERSAND0
|
||||
STRING_Lao0
|
||||
STRING_Latin0
|
||||
STRING_Lepcha0
|
||||
STRING_Limbu0
|
||||
STRING_Linear_A0
|
||||
STRING_Linear_B0
|
||||
STRING_Lisu0
|
||||
STRING_Ll0
|
||||
|
@ -432,18 +463,24 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Lycian0
|
||||
STRING_Lydian0
|
||||
STRING_M0
|
||||
STRING_Mahajani0
|
||||
STRING_Malayalam0
|
||||
STRING_Mandaic0
|
||||
STRING_Manichaean0
|
||||
STRING_Mc0
|
||||
STRING_Me0
|
||||
STRING_Meetei_Mayek0
|
||||
STRING_Mende_Kikakui0
|
||||
STRING_Meroitic_Cursive0
|
||||
STRING_Meroitic_Hieroglyphs0
|
||||
STRING_Miao0
|
||||
STRING_Mn0
|
||||
STRING_Modi0
|
||||
STRING_Mongolian0
|
||||
STRING_Mro0
|
||||
STRING_Myanmar0
|
||||
STRING_N0
|
||||
STRING_Nabataean0
|
||||
STRING_Nd0
|
||||
STRING_New_Tai_Lue0
|
||||
STRING_Nko0
|
||||
|
@ -452,12 +489,17 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Ogham0
|
||||
STRING_Ol_Chiki0
|
||||
STRING_Old_Italic0
|
||||
STRING_Old_North_Arabian0
|
||||
STRING_Old_Permic0
|
||||
STRING_Old_Persian0
|
||||
STRING_Old_South_Arabian0
|
||||
STRING_Old_Turkic0
|
||||
STRING_Oriya0
|
||||
STRING_Osmanya0
|
||||
STRING_P0
|
||||
STRING_Pahawh_Hmong0
|
||||
STRING_Palmyrene0
|
||||
STRING_Pau_Cin_Hau0
|
||||
STRING_Pc0
|
||||
STRING_Pd0
|
||||
STRING_Pe0
|
||||
|
@ -467,6 +509,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Pi0
|
||||
STRING_Po0
|
||||
STRING_Ps0
|
||||
STRING_Psalter_Pahlavi0
|
||||
STRING_Rejang0
|
||||
STRING_Runic0
|
||||
STRING_S0
|
||||
|
@ -475,6 +518,7 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Sc0
|
||||
STRING_Sharada0
|
||||
STRING_Shavian0
|
||||
STRING_Siddham0
|
||||
STRING_Sinhala0
|
||||
STRING_Sk0
|
||||
STRING_Sm0
|
||||
|
@ -495,8 +539,10 @@ const char PRIV(utt_names)[] =
|
|||
STRING_Thai0
|
||||
STRING_Tibetan0
|
||||
STRING_Tifinagh0
|
||||
STRING_Tirhuta0
|
||||
STRING_Ugaritic0
|
||||
STRING_Vai0
|
||||
STRING_Warang_Citi0
|
||||
STRING_Xan0
|
||||
STRING_Xps0
|
||||
STRING_Xsp0
|
||||
|
@ -515,146 +561,169 @@ const ucp_type_table PRIV(utt)[] = {
|
|||
{ 20, PT_SC, ucp_Avestan },
|
||||
{ 28, PT_SC, ucp_Balinese },
|
||||
{ 37, PT_SC, ucp_Bamum },
|
||||
{ 43, PT_SC, ucp_Batak },
|
||||
{ 49, PT_SC, ucp_Bengali },
|
||||
{ 57, PT_SC, ucp_Bopomofo },
|
||||
{ 66, PT_SC, ucp_Brahmi },
|
||||
{ 73, PT_SC, ucp_Braille },
|
||||
{ 81, PT_SC, ucp_Buginese },
|
||||
{ 90, PT_SC, ucp_Buhid },
|
||||
{ 96, PT_GC, ucp_C },
|
||||
{ 98, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 118, PT_SC, ucp_Carian },
|
||||
{ 125, PT_PC, ucp_Cc },
|
||||
{ 128, PT_PC, ucp_Cf },
|
||||
{ 131, PT_SC, ucp_Chakma },
|
||||
{ 138, PT_SC, ucp_Cham },
|
||||
{ 143, PT_SC, ucp_Cherokee },
|
||||
{ 152, PT_PC, ucp_Cn },
|
||||
{ 155, PT_PC, ucp_Co },
|
||||
{ 158, PT_SC, ucp_Common },
|
||||
{ 165, PT_SC, ucp_Coptic },
|
||||
{ 172, PT_PC, ucp_Cs },
|
||||
{ 175, PT_SC, ucp_Cuneiform },
|
||||
{ 185, PT_SC, ucp_Cypriot },
|
||||
{ 193, PT_SC, ucp_Cyrillic },
|
||||
{ 202, PT_SC, ucp_Deseret },
|
||||
{ 210, PT_SC, ucp_Devanagari },
|
||||
{ 221, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 242, PT_SC, ucp_Ethiopic },
|
||||
{ 251, PT_SC, ucp_Georgian },
|
||||
{ 260, PT_SC, ucp_Glagolitic },
|
||||
{ 271, PT_SC, ucp_Gothic },
|
||||
{ 278, PT_SC, ucp_Greek },
|
||||
{ 284, PT_SC, ucp_Gujarati },
|
||||
{ 293, PT_SC, ucp_Gurmukhi },
|
||||
{ 302, PT_SC, ucp_Han },
|
||||
{ 306, PT_SC, ucp_Hangul },
|
||||
{ 313, PT_SC, ucp_Hanunoo },
|
||||
{ 321, PT_SC, ucp_Hebrew },
|
||||
{ 328, PT_SC, ucp_Hiragana },
|
||||
{ 337, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 354, PT_SC, ucp_Inherited },
|
||||
{ 364, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 386, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 409, PT_SC, ucp_Javanese },
|
||||
{ 418, PT_SC, ucp_Kaithi },
|
||||
{ 425, PT_SC, ucp_Kannada },
|
||||
{ 433, PT_SC, ucp_Katakana },
|
||||
{ 442, PT_SC, ucp_Kayah_Li },
|
||||
{ 451, PT_SC, ucp_Kharoshthi },
|
||||
{ 462, PT_SC, ucp_Khmer },
|
||||
{ 468, PT_GC, ucp_L },
|
||||
{ 470, PT_LAMP, 0 },
|
||||
{ 473, PT_SC, ucp_Lao },
|
||||
{ 477, PT_SC, ucp_Latin },
|
||||
{ 483, PT_SC, ucp_Lepcha },
|
||||
{ 490, PT_SC, ucp_Limbu },
|
||||
{ 496, PT_SC, ucp_Linear_B },
|
||||
{ 505, PT_SC, ucp_Lisu },
|
||||
{ 510, PT_PC, ucp_Ll },
|
||||
{ 513, PT_PC, ucp_Lm },
|
||||
{ 516, PT_PC, ucp_Lo },
|
||||
{ 519, PT_PC, ucp_Lt },
|
||||
{ 522, PT_PC, ucp_Lu },
|
||||
{ 525, PT_SC, ucp_Lycian },
|
||||
{ 532, PT_SC, ucp_Lydian },
|
||||
{ 539, PT_GC, ucp_M },
|
||||
{ 541, PT_SC, ucp_Malayalam },
|
||||
{ 551, PT_SC, ucp_Mandaic },
|
||||
{ 559, PT_PC, ucp_Mc },
|
||||
{ 562, PT_PC, ucp_Me },
|
||||
{ 565, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 578, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 595, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 616, PT_SC, ucp_Miao },
|
||||
{ 621, PT_PC, ucp_Mn },
|
||||
{ 624, PT_SC, ucp_Mongolian },
|
||||
{ 634, PT_SC, ucp_Myanmar },
|
||||
{ 642, PT_GC, ucp_N },
|
||||
{ 644, PT_PC, ucp_Nd },
|
||||
{ 647, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 659, PT_SC, ucp_Nko },
|
||||
{ 663, PT_PC, ucp_Nl },
|
||||
{ 666, PT_PC, ucp_No },
|
||||
{ 669, PT_SC, ucp_Ogham },
|
||||
{ 675, PT_SC, ucp_Ol_Chiki },
|
||||
{ 684, PT_SC, ucp_Old_Italic },
|
||||
{ 695, PT_SC, ucp_Old_Persian },
|
||||
{ 707, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 725, PT_SC, ucp_Old_Turkic },
|
||||
{ 736, PT_SC, ucp_Oriya },
|
||||
{ 742, PT_SC, ucp_Osmanya },
|
||||
{ 750, PT_GC, ucp_P },
|
||||
{ 752, PT_PC, ucp_Pc },
|
||||
{ 755, PT_PC, ucp_Pd },
|
||||
{ 758, PT_PC, ucp_Pe },
|
||||
{ 761, PT_PC, ucp_Pf },
|
||||
{ 764, PT_SC, ucp_Phags_Pa },
|
||||
{ 773, PT_SC, ucp_Phoenician },
|
||||
{ 784, PT_PC, ucp_Pi },
|
||||
{ 787, PT_PC, ucp_Po },
|
||||
{ 790, PT_PC, ucp_Ps },
|
||||
{ 793, PT_SC, ucp_Rejang },
|
||||
{ 800, PT_SC, ucp_Runic },
|
||||
{ 806, PT_GC, ucp_S },
|
||||
{ 808, PT_SC, ucp_Samaritan },
|
||||
{ 818, PT_SC, ucp_Saurashtra },
|
||||
{ 829, PT_PC, ucp_Sc },
|
||||
{ 832, PT_SC, ucp_Sharada },
|
||||
{ 840, PT_SC, ucp_Shavian },
|
||||
{ 848, PT_SC, ucp_Sinhala },
|
||||
{ 856, PT_PC, ucp_Sk },
|
||||
{ 859, PT_PC, ucp_Sm },
|
||||
{ 862, PT_PC, ucp_So },
|
||||
{ 865, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 878, PT_SC, ucp_Sundanese },
|
||||
{ 888, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 901, PT_SC, ucp_Syriac },
|
||||
{ 908, PT_SC, ucp_Tagalog },
|
||||
{ 916, PT_SC, ucp_Tagbanwa },
|
||||
{ 925, PT_SC, ucp_Tai_Le },
|
||||
{ 932, PT_SC, ucp_Tai_Tham },
|
||||
{ 941, PT_SC, ucp_Tai_Viet },
|
||||
{ 950, PT_SC, ucp_Takri },
|
||||
{ 956, PT_SC, ucp_Tamil },
|
||||
{ 962, PT_SC, ucp_Telugu },
|
||||
{ 969, PT_SC, ucp_Thaana },
|
||||
{ 976, PT_SC, ucp_Thai },
|
||||
{ 981, PT_SC, ucp_Tibetan },
|
||||
{ 989, PT_SC, ucp_Tifinagh },
|
||||
{ 998, PT_SC, ucp_Ugaritic },
|
||||
{ 1007, PT_SC, ucp_Vai },
|
||||
{ 1011, PT_ALNUM, 0 },
|
||||
{ 1015, PT_PXSPACE, 0 },
|
||||
{ 1019, PT_SPACE, 0 },
|
||||
{ 1023, PT_UCNC, 0 },
|
||||
{ 1027, PT_WORD, 0 },
|
||||
{ 1031, PT_SC, ucp_Yi },
|
||||
{ 1034, PT_GC, ucp_Z },
|
||||
{ 1036, PT_PC, ucp_Zl },
|
||||
{ 1039, PT_PC, ucp_Zp },
|
||||
{ 1042, PT_PC, ucp_Zs }
|
||||
{ 43, PT_SC, ucp_Bassa_Vah },
|
||||
{ 53, PT_SC, ucp_Batak },
|
||||
{ 59, PT_SC, ucp_Bengali },
|
||||
{ 67, PT_SC, ucp_Bopomofo },
|
||||
{ 76, PT_SC, ucp_Brahmi },
|
||||
{ 83, PT_SC, ucp_Braille },
|
||||
{ 91, PT_SC, ucp_Buginese },
|
||||
{ 100, PT_SC, ucp_Buhid },
|
||||
{ 106, PT_GC, ucp_C },
|
||||
{ 108, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 128, PT_SC, ucp_Carian },
|
||||
{ 135, PT_SC, ucp_Caucasian_Albanian },
|
||||
{ 154, PT_PC, ucp_Cc },
|
||||
{ 157, PT_PC, ucp_Cf },
|
||||
{ 160, PT_SC, ucp_Chakma },
|
||||
{ 167, PT_SC, ucp_Cham },
|
||||
{ 172, PT_SC, ucp_Cherokee },
|
||||
{ 181, PT_PC, ucp_Cn },
|
||||
{ 184, PT_PC, ucp_Co },
|
||||
{ 187, PT_SC, ucp_Common },
|
||||
{ 194, PT_SC, ucp_Coptic },
|
||||
{ 201, PT_PC, ucp_Cs },
|
||||
{ 204, PT_SC, ucp_Cuneiform },
|
||||
{ 214, PT_SC, ucp_Cypriot },
|
||||
{ 222, PT_SC, ucp_Cyrillic },
|
||||
{ 231, PT_SC, ucp_Deseret },
|
||||
{ 239, PT_SC, ucp_Devanagari },
|
||||
{ 250, PT_SC, ucp_Duployan },
|
||||
{ 259, PT_SC, ucp_Egyptian_Hieroglyphs },
|
||||
{ 280, PT_SC, ucp_Elbasan },
|
||||
{ 288, PT_SC, ucp_Ethiopic },
|
||||
{ 297, PT_SC, ucp_Georgian },
|
||||
{ 306, PT_SC, ucp_Glagolitic },
|
||||
{ 317, PT_SC, ucp_Gothic },
|
||||
{ 324, PT_SC, ucp_Grantha },
|
||||
{ 332, PT_SC, ucp_Greek },
|
||||
{ 338, PT_SC, ucp_Gujarati },
|
||||
{ 347, PT_SC, ucp_Gurmukhi },
|
||||
{ 356, PT_SC, ucp_Han },
|
||||
{ 360, PT_SC, ucp_Hangul },
|
||||
{ 367, PT_SC, ucp_Hanunoo },
|
||||
{ 375, PT_SC, ucp_Hebrew },
|
||||
{ 382, PT_SC, ucp_Hiragana },
|
||||
{ 391, PT_SC, ucp_Imperial_Aramaic },
|
||||
{ 408, PT_SC, ucp_Inherited },
|
||||
{ 418, PT_SC, ucp_Inscriptional_Pahlavi },
|
||||
{ 440, PT_SC, ucp_Inscriptional_Parthian },
|
||||
{ 463, PT_SC, ucp_Javanese },
|
||||
{ 472, PT_SC, ucp_Kaithi },
|
||||
{ 479, PT_SC, ucp_Kannada },
|
||||
{ 487, PT_SC, ucp_Katakana },
|
||||
{ 496, PT_SC, ucp_Kayah_Li },
|
||||
{ 505, PT_SC, ucp_Kharoshthi },
|
||||
{ 516, PT_SC, ucp_Khmer },
|
||||
{ 522, PT_SC, ucp_Khojki },
|
||||
{ 529, PT_SC, ucp_Khudawadi },
|
||||
{ 539, PT_GC, ucp_L },
|
||||
{ 541, PT_LAMP, 0 },
|
||||
{ 544, PT_SC, ucp_Lao },
|
||||
{ 548, PT_SC, ucp_Latin },
|
||||
{ 554, PT_SC, ucp_Lepcha },
|
||||
{ 561, PT_SC, ucp_Limbu },
|
||||
{ 567, PT_SC, ucp_Linear_A },
|
||||
{ 576, PT_SC, ucp_Linear_B },
|
||||
{ 585, PT_SC, ucp_Lisu },
|
||||
{ 590, PT_PC, ucp_Ll },
|
||||
{ 593, PT_PC, ucp_Lm },
|
||||
{ 596, PT_PC, ucp_Lo },
|
||||
{ 599, PT_PC, ucp_Lt },
|
||||
{ 602, PT_PC, ucp_Lu },
|
||||
{ 605, PT_SC, ucp_Lycian },
|
||||
{ 612, PT_SC, ucp_Lydian },
|
||||
{ 619, PT_GC, ucp_M },
|
||||
{ 621, PT_SC, ucp_Mahajani },
|
||||
{ 630, PT_SC, ucp_Malayalam },
|
||||
{ 640, PT_SC, ucp_Mandaic },
|
||||
{ 648, PT_SC, ucp_Manichaean },
|
||||
{ 659, PT_PC, ucp_Mc },
|
||||
{ 662, PT_PC, ucp_Me },
|
||||
{ 665, PT_SC, ucp_Meetei_Mayek },
|
||||
{ 678, PT_SC, ucp_Mende_Kikakui },
|
||||
{ 692, PT_SC, ucp_Meroitic_Cursive },
|
||||
{ 709, PT_SC, ucp_Meroitic_Hieroglyphs },
|
||||
{ 730, PT_SC, ucp_Miao },
|
||||
{ 735, PT_PC, ucp_Mn },
|
||||
{ 738, PT_SC, ucp_Modi },
|
||||
{ 743, PT_SC, ucp_Mongolian },
|
||||
{ 753, PT_SC, ucp_Mro },
|
||||
{ 757, PT_SC, ucp_Myanmar },
|
||||
{ 765, PT_GC, ucp_N },
|
||||
{ 767, PT_SC, ucp_Nabataean },
|
||||
{ 777, PT_PC, ucp_Nd },
|
||||
{ 780, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 792, PT_SC, ucp_Nko },
|
||||
{ 796, PT_PC, ucp_Nl },
|
||||
{ 799, PT_PC, ucp_No },
|
||||
{ 802, PT_SC, ucp_Ogham },
|
||||
{ 808, PT_SC, ucp_Ol_Chiki },
|
||||
{ 817, PT_SC, ucp_Old_Italic },
|
||||
{ 828, PT_SC, ucp_Old_North_Arabian },
|
||||
{ 846, PT_SC, ucp_Old_Permic },
|
||||
{ 857, PT_SC, ucp_Old_Persian },
|
||||
{ 869, PT_SC, ucp_Old_South_Arabian },
|
||||
{ 887, PT_SC, ucp_Old_Turkic },
|
||||
{ 898, PT_SC, ucp_Oriya },
|
||||
{ 904, PT_SC, ucp_Osmanya },
|
||||
{ 912, PT_GC, ucp_P },
|
||||
{ 914, PT_SC, ucp_Pahawh_Hmong },
|
||||
{ 927, PT_SC, ucp_Palmyrene },
|
||||
{ 937, PT_SC, ucp_Pau_Cin_Hau },
|
||||
{ 949, PT_PC, ucp_Pc },
|
||||
{ 952, PT_PC, ucp_Pd },
|
||||
{ 955, PT_PC, ucp_Pe },
|
||||
{ 958, PT_PC, ucp_Pf },
|
||||
{ 961, PT_SC, ucp_Phags_Pa },
|
||||
{ 970, PT_SC, ucp_Phoenician },
|
||||
{ 981, PT_PC, ucp_Pi },
|
||||
{ 984, PT_PC, ucp_Po },
|
||||
{ 987, PT_PC, ucp_Ps },
|
||||
{ 990, PT_SC, ucp_Psalter_Pahlavi },
|
||||
{ 1006, PT_SC, ucp_Rejang },
|
||||
{ 1013, PT_SC, ucp_Runic },
|
||||
{ 1019, PT_GC, ucp_S },
|
||||
{ 1021, PT_SC, ucp_Samaritan },
|
||||
{ 1031, PT_SC, ucp_Saurashtra },
|
||||
{ 1042, PT_PC, ucp_Sc },
|
||||
{ 1045, PT_SC, ucp_Sharada },
|
||||
{ 1053, PT_SC, ucp_Shavian },
|
||||
{ 1061, PT_SC, ucp_Siddham },
|
||||
{ 1069, PT_SC, ucp_Sinhala },
|
||||
{ 1077, PT_PC, ucp_Sk },
|
||||
{ 1080, PT_PC, ucp_Sm },
|
||||
{ 1083, PT_PC, ucp_So },
|
||||
{ 1086, PT_SC, ucp_Sora_Sompeng },
|
||||
{ 1099, PT_SC, ucp_Sundanese },
|
||||
{ 1109, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 1122, PT_SC, ucp_Syriac },
|
||||
{ 1129, PT_SC, ucp_Tagalog },
|
||||
{ 1137, PT_SC, ucp_Tagbanwa },
|
||||
{ 1146, PT_SC, ucp_Tai_Le },
|
||||
{ 1153, PT_SC, ucp_Tai_Tham },
|
||||
{ 1162, PT_SC, ucp_Tai_Viet },
|
||||
{ 1171, PT_SC, ucp_Takri },
|
||||
{ 1177, PT_SC, ucp_Tamil },
|
||||
{ 1183, PT_SC, ucp_Telugu },
|
||||
{ 1190, PT_SC, ucp_Thaana },
|
||||
{ 1197, PT_SC, ucp_Thai },
|
||||
{ 1202, PT_SC, ucp_Tibetan },
|
||||
{ 1210, PT_SC, ucp_Tifinagh },
|
||||
{ 1219, PT_SC, ucp_Tirhuta },
|
||||
{ 1227, PT_SC, ucp_Ugaritic },
|
||||
{ 1236, PT_SC, ucp_Vai },
|
||||
{ 1240, PT_SC, ucp_Warang_Citi },
|
||||
{ 1252, PT_ALNUM, 0 },
|
||||
{ 1256, PT_PXSPACE, 0 },
|
||||
{ 1260, PT_SPACE, 0 },
|
||||
{ 1264, PT_UCNC, 0 },
|
||||
{ 1268, PT_WORD, 0 },
|
||||
{ 1272, PT_SC, ucp_Yi },
|
||||
{ 1275, PT_GC, ucp_Z },
|
||||
{ 1277, PT_PC, ucp_Zl },
|
||||
{ 1280, PT_PC, ucp_Zp },
|
||||
{ 1283, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
|
||||
|
|
5117
src/pcre2_ucd.c
5117
src/pcre2_ucd.c
File diff suppressed because it is too large
Load Diff
|
@ -229,9 +229,33 @@ enum {
|
|||
ucp_Miao,
|
||||
ucp_Sharada,
|
||||
ucp_Sora_Sompeng,
|
||||
ucp_Takri
|
||||
ucp_Takri,
|
||||
/* New for Unicode 7.0.0: */
|
||||
ucp_Bassa_Vah,
|
||||
ucp_Caucasian_Albanian,
|
||||
ucp_Duployan,
|
||||
ucp_Elbasan,
|
||||
ucp_Grantha,
|
||||
ucp_Khojki,
|
||||
ucp_Khudawadi,
|
||||
ucp_Linear_A,
|
||||
ucp_Mahajani,
|
||||
ucp_Manichaean,
|
||||
ucp_Mende_Kikakui,
|
||||
ucp_Modi,
|
||||
ucp_Mro,
|
||||
ucp_Nabataean,
|
||||
ucp_Old_North_Arabian,
|
||||
ucp_Old_Permic,
|
||||
ucp_Pahawh_Hmong,
|
||||
ucp_Palmyrene,
|
||||
ucp_Psalter_Pahlavi,
|
||||
ucp_Pau_Cin_Hau,
|
||||
ucp_Siddham,
|
||||
ucp_Tirhuta,
|
||||
ucp_Warang_Citi
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
/* End of pcvre2_ucp.h */
|
||||
/* End of pcre2_ucp.h */
|
||||
|
|
Loading…
Reference in New Issue