Add bidi class and control information to Unicode property data
This commit is contained in:
parent
ba3d0edcbd
commit
823d4ac956
|
@ -15,15 +15,16 @@
|
|||
#
|
||||
# [python3] ./MultiStage2.py >../src/pcre2_ucd.c
|
||||
#
|
||||
# It requires six Unicode data tables: DerivedGeneralCategory.txt,
|
||||
# GraphemeBreakProperty.txt, Scripts.txt, ScriptExtensions.txt,
|
||||
# CaseFolding.txt, and emoji-data.txt. These must be in the
|
||||
# maint/Unicode.tables subdirectory.
|
||||
# It requires eight Unicode data tables: DerivedBidiClass.txt,
|
||||
# DerivedGeneralCategory.txt, GraphemeBreakProperty.txt, PropList.txt,
|
||||
# Scripts.txt, ScriptExtensions.txt, CaseFolding.txt, and emoji-data.txt. These
|
||||
# must be in the maint/Unicode.tables subdirectory.
|
||||
#
|
||||
# DerivedGeneralCategory.txt is found in the "extracted" subdirectory of the
|
||||
# Unicode database (UCD) on the Unicode web site; GraphemeBreakProperty.txt is
|
||||
# in the "auxiliary" subdirectory. Scripts.txt, ScriptExtensions.txt, and
|
||||
# CaseFolding.txt are directly in the UCD directory.
|
||||
# DerivedBidiClass.txt and DerivedGeneralCategory.txt are in the "extracted"
|
||||
# subdirectory of the Unicode database (UCD) on the Unicode web site;
|
||||
# GraphemeBreakProperty.txt is in the "auxiliary" subdirectory. PropList.txt,
|
||||
# Scripts.txt, ScriptExtensions.txt, and CaseFolding.txt are directly in the
|
||||
# UCD directory.
|
||||
#
|
||||
# The emoji-data.txt file is found in the "emoji" subdirectory even though it
|
||||
# is technically part of a different (but coordinated) standard as shown
|
||||
|
@ -69,6 +70,10 @@
|
|||
# Added code to add a Script Extensions field to records. This has increased
|
||||
# their size from 8 to 12 bytes, only 10 of which are currently used.
|
||||
#
|
||||
# Added code to add a bidi class field to records by scanning the
|
||||
# DerivedBidiClass.txt and PropList.txt files. This uses one of the two spare
|
||||
# bytes, so now 11 out of 12 are in use.
|
||||
#
|
||||
# 01-March-2010: Updated list of scripts for Unicode 5.2.0
|
||||
# 30-April-2011: Updated list of scripts for Unicode 6.0.0
|
||||
# July-2012: Updated list of scripts for Unicode 6.1.0
|
||||
|
@ -93,6 +98,8 @@
|
|||
# 27-July-2019: Updated for Unicode 12.1.0
|
||||
# 10-March-2020: Updated for Unicode 13.0.0
|
||||
# PCRE2-10.39: Updated for Unicode 14.0.0
|
||||
# 05-December-2021: Added code to scan DerivedBidiClass.txt for bidi class,
|
||||
# and also PropList.txt for the Bidi_Control property
|
||||
# ----------------------------------------------------------------------------
|
||||
#
|
||||
#
|
||||
|
@ -100,14 +107,15 @@
|
|||
# pcre2_internal.h. They look up Unicode character properties using short
|
||||
# sequences of code that contains no branches, which makes for greater speed.
|
||||
#
|
||||
# Conceptually, there is a table of records (of type ucd_record), containing a
|
||||
# script number, script extension value, character type, grapheme break type,
|
||||
# offset to caseless matching set, offset to the character's other case, for
|
||||
# every Unicode character. However, a real table covering all Unicode
|
||||
# characters would be far too big. It can be efficiently compressed by
|
||||
# observing that many characters have the same record, and many blocks of
|
||||
# characters (taking 128 characters in a block) have the same set of records as
|
||||
# other blocks. This leads to a 2-stage lookup process.
|
||||
# Conceptually, there is a table of records (of type ucd_record), one for each
|
||||
# Unicode character. Each record contains the script number, script extension
|
||||
# value, character type, grapheme break type, offset to caseless matching set,
|
||||
# offset to the character's other case, and the bidi class/control. However, a
|
||||
# real table covering all Unicode characters would be far too big. It can be
|
||||
# efficiently compressed by observing that many characters have the same
|
||||
# record, and many blocks of characters (taking 128 characters in a block) have
|
||||
# the same set of records as other blocks. This leads to a 2-stage lookup
|
||||
# process.
|
||||
#
|
||||
# This script constructs six tables. The ucd_caseless_sets table contains
|
||||
# lists of characters that all match each other caselessly. Each list is
|
||||
|
@ -136,19 +144,20 @@
|
|||
# the offset of a character within its own block, and the result is the index
|
||||
# number of the required record in the ucd_records vector.
|
||||
#
|
||||
# The following examples are correct for the Unicode 11.0.0 database. Future
|
||||
# The following examples are correct for the Unicode 14.0.0 database. Future
|
||||
# updates may make change the actual lookup values.
|
||||
#
|
||||
# Example: lowercase "a" (U+0061) is in block 0
|
||||
# lookup 0 in stage1 table yields 0
|
||||
# lookup 97 (0x61) in the first table in stage2 yields 17
|
||||
# record 17 is { 34, 5, 12, 0, -32, 34, 0 }
|
||||
# lookup 97 (0x61) in the first table in stage2 yields 22
|
||||
# record 22 is { 34, 5, 12, 0, -32, 34, 2, 0 }
|
||||
# 34 = ucp_Latin => Latin script
|
||||
# 5 = ucp_Ll => Lower case letter
|
||||
# 12 = ucp_gbOther => Grapheme break property "Other"
|
||||
# 0 => Not part of a caseless set
|
||||
# -32 (-0x20) => Other case is U+0041
|
||||
# 34 = ucp_Latin => No special Script Extension property
|
||||
# 2 = ucp_bidiL => Bidi class left-to-right
|
||||
# 0 => Dummy value, unused at present
|
||||
#
|
||||
# Almost all lowercase latin characters resolve to the same record. One or two
|
||||
|
@ -156,34 +165,36 @@
|
|||
# example, k, K and the Kelvin symbol are such a set).
|
||||
#
|
||||
# Example: hiragana letter A (U+3042) is in block 96 (0x60)
|
||||
# lookup 96 in stage1 table yields 90
|
||||
# lookup 66 (0x42) in table 90 in stage2 yields 564
|
||||
# record 564 is { 27, 7, 12, 0, 0, 27, 0 }
|
||||
# lookup 96 in stage1 table yields 91
|
||||
# lookup 66 (0x42) in table 91 in stage2 yields 613
|
||||
# record 613 is { 27, 7, 12, 0, 0, 27, 2, 0 }
|
||||
# 27 = ucp_Hiragana => Hiragana script
|
||||
# 7 = ucp_Lo => Other letter
|
||||
# 12 = ucp_gbOther => Grapheme break property "Other"
|
||||
# 0 => Not part of a caseless set
|
||||
# 0 => No other case
|
||||
# 27 = ucp_Hiragana => No special Script Extension property
|
||||
# 2 = ucp_bidiL => Bidi class left-to-right
|
||||
# 0 => Dummy value, unused at present
|
||||
#
|
||||
# Example: vedic tone karshana (U+1CD0) is in block 57 (0x39)
|
||||
# lookup 57 in stage1 table yields 55
|
||||
# lookup 80 (0x50) in table 55 in stage2 yields 458
|
||||
# record 458 is { 28, 12, 3, 0, 0, -101, 0 }
|
||||
# lookup 80 (0x50) in table 55 in stage2 yields 485
|
||||
# record 485 is { 28, 12, 3, 0, 0, -122, 19, 0 }
|
||||
# 28 = ucp_Inherited => Script inherited from predecessor
|
||||
# 12 = ucp_Mn => Non-spacing mark
|
||||
# 3 = ucp_gbExtend => Grapheme break property "Extend"
|
||||
# 0 => Not part of a caseless set
|
||||
# 0 => No other case
|
||||
# -101 => Script Extension list offset = 101
|
||||
# -122 => Script Extension list offset = 122
|
||||
# 19 = ucp_bidiNSM => Bidi class non-spacing mark
|
||||
# 0 => Dummy value, unused at present
|
||||
#
|
||||
# At offset 101 in the ucd_script_sets vector we find the list 3, 15, 107, 29,
|
||||
# and terminator 0. This means that this character is expected to be used with
|
||||
# any of those scripts, which are Bengali, Devanagari, Grantha, and Kannada.
|
||||
#
|
||||
# Philip Hazel, 03 July 2008
|
||||
# Philip Hazel, last updated 05 December 2021.
|
||||
##############################################################################
|
||||
|
||||
|
||||
|
@ -195,17 +206,21 @@ MAX_UNICODE = 0x110000
|
|||
NOTACHAR = 0xffffffff
|
||||
|
||||
|
||||
# Parse a line of Scripts.txt, GraphemeBreakProperty.txt or DerivedGeneralCategory.txt
|
||||
# Parse a line of Scripts.txt, GraphemeBreakProperty.txt,
|
||||
# DerivedBidiClass.txt or DerivedGeneralCategory.txt
|
||||
|
||||
def make_get_names(enum):
|
||||
return lambda chardata: enum.index(chardata[1])
|
||||
|
||||
# Parse a line of CaseFolding.txt
|
||||
|
||||
def get_other_case(chardata):
|
||||
if chardata[1] == 'C' or chardata[1] == 'S':
|
||||
return int(chardata[2], 16) - int(chardata[0], 16)
|
||||
return 0
|
||||
|
||||
# Parse a line of ScriptExtensions.txt
|
||||
|
||||
def get_script_extension(chardata):
|
||||
this_script_list = list(chardata[1].split(' '))
|
||||
if len(this_script_list) == 1:
|
||||
|
@ -233,6 +248,7 @@ def get_script_extension(chardata):
|
|||
return -return_value
|
||||
|
||||
# Read the whole table in memory, setting/checking the Unicode version
|
||||
|
||||
def read_table(file_name, get_value, default_value):
|
||||
global unicode_version
|
||||
|
||||
|
@ -489,6 +505,14 @@ break_property_names = ['CR', 'LF', 'Control', 'Extend', 'Prepend',
|
|||
'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'Regional_Indicator', 'Other',
|
||||
'ZWJ', 'Extended_Pictographic' ]
|
||||
|
||||
# BIDI class property names in the DerivedBidiClass.txt file
|
||||
|
||||
bidiclass_names = ['AL', 'AN', 'B', 'BN', 'CS', 'EN', 'ES', 'ET', 'FSI', 'L',
|
||||
'LRE', 'LRI', 'LRO', 'NSM', 'ON', 'PDF', 'PDI', 'R', 'RLE', 'RLI', 'RLO',
|
||||
'S', 'WS' ]
|
||||
|
||||
# Create the various tables
|
||||
|
||||
test_record_size()
|
||||
unicode_version = ""
|
||||
|
||||
|
@ -496,6 +520,28 @@ script = read_table('Unicode.tables/Scripts.txt', make_get_names(script_names),
|
|||
category = read_table('Unicode.tables/DerivedGeneralCategory.txt', make_get_names(category_names), category_names.index('Cn'))
|
||||
break_props = read_table('Unicode.tables/GraphemeBreakProperty.txt', make_get_names(break_property_names), break_property_names.index('Other'))
|
||||
other_case = read_table('Unicode.tables/CaseFolding.txt', get_other_case, 0)
|
||||
bidi_class = read_table('Unicode.tables/DerivedBidiClass.txt', make_get_names(bidiclass_names), bidiclass_names.index('L'))
|
||||
|
||||
# The Bidi_Control property is a Y/N value, so needs only one bit. We scan the
|
||||
# PropList.txt file and set 0x80 bit in the bidi_class table.
|
||||
|
||||
file = open('Unicode.tables/PropList.txt', 'r', encoding='utf-8')
|
||||
for line in file:
|
||||
line = re.sub(r'#.*', '', line)
|
||||
chardata = list(map(str.strip, line.split(';')))
|
||||
if len(chardata) <= 1:
|
||||
continue
|
||||
if chardata[1] != "Bidi_Control":
|
||||
continue
|
||||
m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0])
|
||||
char = int(m.group(1), 16)
|
||||
if m.group(3) is None:
|
||||
last = char
|
||||
else:
|
||||
last = int(m.group(3), 16)
|
||||
for i in range(char, last + 1):
|
||||
bidi_class[i] |= 0x80;
|
||||
file.close()
|
||||
|
||||
# The grapheme breaking rules were changed for Unicode 11.0.0 (June 2018). Now
|
||||
# we need to find the Extended_Pictographic property for emoji characters. This
|
||||
|
@ -509,10 +555,8 @@ for line in file:
|
|||
chardata = list(map(str.strip, line.split(';')))
|
||||
if len(chardata) <= 1:
|
||||
continue
|
||||
|
||||
if chardata[1] != "Extended_Pictographic":
|
||||
continue
|
||||
|
||||
m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0])
|
||||
char = int(m.group(1), 16)
|
||||
if m.group(3) is None:
|
||||
|
@ -542,12 +586,13 @@ for i in range(0, MAX_UNICODE):
|
|||
if scriptx[i] == script_abbrevs_default:
|
||||
scriptx[i] = script[i]
|
||||
|
||||
# With the addition of the new Script Extensions field, we need some padding
|
||||
# to get the Unicode records up to 12 bytes (multiple of 4). Set a value
|
||||
# greater than 255 to make the field 16 bits.
|
||||
# With the addition of the Script Extensions field, we needed some padding to
|
||||
# get the Unicode records up to 12 bytes (multiple of 4). Originally this was a
|
||||
# 16-bit field and padding_dummy[0] was set to 256 to ensure this, but 8 bits
|
||||
# are now used for the bidi class, so zero will do.
|
||||
|
||||
padding_dummy = [0] * MAX_UNICODE
|
||||
padding_dummy[0] = 256
|
||||
padding_dummy[0] = 0
|
||||
|
||||
# This block of code was added by PH in September 2012. I am not a Python
|
||||
# programmer, so the style is probably dreadful, but it does the job. It scans
|
||||
|
@ -622,7 +667,7 @@ for s in sets:
|
|||
# Combine the tables
|
||||
|
||||
table, records = combine_tables(script, category, break_props,
|
||||
caseless_offsets, other_case, scriptx, padding_dummy)
|
||||
caseless_offsets, other_case, scriptx, bidi_class, padding_dummy)
|
||||
|
||||
record_size, record_struct = get_record_size_struct(list(records.keys()))
|
||||
|
||||
|
@ -673,7 +718,7 @@ print("a totally empty module because some compilers barf at that.")
|
|||
print("Instead, just supply some small dummy tables. */")
|
||||
print()
|
||||
print("#ifndef SUPPORT_UNICODE")
|
||||
print("const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0 }};")
|
||||
print("const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0,0 }};")
|
||||
print("const uint16_t PRIV(ucd_stage1)[] = {0};")
|
||||
print("const uint16_t PRIV(ucd_stage2)[] = {0};")
|
||||
print("const uint32_t PRIV(ucd_caseless_sets)[] = {0};")
|
||||
|
@ -693,6 +738,7 @@ print(" ucp_gbOther, /* grapheme break property */")
|
|||
print(" 0, /* case set */")
|
||||
print(" 0, /* other case */")
|
||||
print(" ucp_Unknown, /* script extension */")
|
||||
print(" ucp_bidiL, /* bidi class */")
|
||||
print(" 0, /* dummy filler */")
|
||||
print(" }};")
|
||||
print("#endif")
|
||||
|
@ -775,8 +821,9 @@ print("\n};\n")
|
|||
print("/* These are the main two-stage UCD tables. The fields in each record are:")
|
||||
print("script (8 bits), character type (8 bits), grapheme break property (8 bits),")
|
||||
print("offset to multichar other cases or zero (8 bits), offset to other case")
|
||||
print("or zero (32 bits, signed), script extension (16 bits, signed), and a dummy")
|
||||
print("16-bit field to make the whole thing a multiple of 4 bytes. */\n")
|
||||
print("or zero (32 bits, signed), script extension (16 bits, signed), bidi class")
|
||||
print("(8 bits), and a dummy 8-bit field to make the whole thing a multiple")
|
||||
print("of 4 bytes. */\n")
|
||||
|
||||
print_records(records, record_size)
|
||||
print_table(min_stage1, 'PRIV(ucd_stage1)')
|
||||
|
|
|
@ -23,7 +23,7 @@ GenerateUtt.py A Python script to generate part of the pcre2_tables.c file
|
|||
ManyConfigTests A shell script that runs "configure, make, test" a number of
|
||||
times with different configuration settings.
|
||||
|
||||
MultiStage2.py A Python script that generates the file pcre2_ucd.c from six
|
||||
MultiStage2.py A Python script that generates the file pcre2_ucd.c from eight
|
||||
Unicode data files, which are themselves downloaded from the
|
||||
Unicode web site. Run this script in the "maint" directory.
|
||||
The generated file is written to stdout. It contains the
|
||||
|
@ -41,7 +41,8 @@ README This file.
|
|||
Unicode.tables The files in this directory were downloaded from the Unicode
|
||||
web site. They contain information about Unicode characters
|
||||
and scripts. The ones used by the MultiStage2.py script are
|
||||
CaseFolding.txt, DerivedGeneralCategory.txt, Scripts.txt,
|
||||
CaseFolding.txt, DerivedBidiClass.txt,
|
||||
DerivedGeneralCategory.txt, PropList.txt, Scripts.txt,
|
||||
ScriptExtensions.txt, GraphemeBreakProperty.txt, and
|
||||
emoji-data.txt. I've kept UnicodeData.txt (which is no longer
|
||||
used by the script) because it is useful occasionally for
|
||||
|
@ -439,4 +440,4 @@ years.
|
|||
Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
Last updated: 26 August 2021
|
||||
Last updated: 05 December 2021
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
248
maint/ucptest.c
248
maint/ucptest.c
|
@ -2,7 +2,7 @@
|
|||
* A program for testing the Unicode property table *
|
||||
***************************************************/
|
||||
|
||||
/* Copyright (c) University of Cambridge 2008-2020 */
|
||||
/* Copyright (c) University of Cambridge 2008-2021 */
|
||||
|
||||
/* Compile thus:
|
||||
|
||||
|
@ -19,33 +19,35 @@ I wrote it to help with debugging PCRE, and have added things that I found
|
|||
useful, in a rather haphazard way. The code has never been seriously tidied or
|
||||
checked for robustness, but it shouldn't now give compiler warnings.
|
||||
|
||||
There is only one option: "-s". If given, it applies only to the "findprop"
|
||||
command. It causes the UTF-8 sequence of bytes that encode the character to be
|
||||
output between angle brackets at the end of the line. On a UTF-8 terminal, this
|
||||
There is only one option: "-s". If given, it applies only to the "findprop"
|
||||
command. It causes the UTF-8 sequence of bytes that encode the character to be
|
||||
output between angle brackets at the end of the line. On a UTF-8 terminal, this
|
||||
will show the appropriate graphic for the code point.
|
||||
|
||||
If the command has arguments, they are concatenated into a buffer, separated by
|
||||
spaces. If the first argument starts "U+" or consists entirely of hexadecimal
|
||||
digits, "findprop" is inserted at the start. The buffer is then processed as a
|
||||
single line file, after which the program exits. If there are no arguments, the
|
||||
program reads commands line by line on stdin and writes output to stdout. The
|
||||
program reads commands line by line on stdin and writes output to stdout. The
|
||||
return code is always zero.
|
||||
|
||||
There are three commands:
|
||||
|
||||
"findprop" must be followed by a space-separated list of Unicode code points as
|
||||
hex numbers, either without any prefix or starting with "U+". The output is one
|
||||
line per character, giving its Unicode properties followed by its other case or
|
||||
line per character, giving its Unicode properties followed by its other case or
|
||||
cases if one or more exist, followed by its Script Extension list if it is not
|
||||
just the same as the base script. This list is in square brackets. The
|
||||
properties are:
|
||||
|
||||
Bidi control shown as '*' if true
|
||||
Bidi class e.g. NSM (most common is L)
|
||||
General type e.g. Letter
|
||||
Specific type e.g. Upper case letter
|
||||
Script e.g. Medefaidrin
|
||||
Grapheme break type e.g. Extend (most common is Other)
|
||||
|
||||
"find" must be followed by a list of property names and their values. The
|
||||
"find" must be followed by a list of property names and their values. The
|
||||
values are case-sensitive. This finds characters that have those properties. If
|
||||
multiple properties are listed, they must all be matched. Currently supported:
|
||||
|
||||
|
@ -56,6 +58,8 @@ multiple properties are listed, they must all be matched. Currently supported:
|
|||
scripts must be present.
|
||||
type <abbrev> The character's specific type (e.g. Lu or Nd) must match.
|
||||
gbreak <name> The grapheme break property must match.
|
||||
bidi <class> The character's bidi class must match.
|
||||
bidi_control The character must be a bidi control character
|
||||
|
||||
If a <name> or <abbrev> is preceded by !, the value must NOT be present. For
|
||||
Script Extensions, there may be a mixture of positive and negative
|
||||
|
@ -63,10 +67,10 @@ requirements. All must be satisfied.
|
|||
|
||||
Sequences of two or more characters are shown as ranges, for example
|
||||
U+0041..U+004A. No more than 100 lines are are output. If there are more
|
||||
characters, the list ends with ...
|
||||
characters, the list ends with ...
|
||||
|
||||
"list" must be followed by a property name (script, type, or gbreak). The
|
||||
defined values for that property are listed. */
|
||||
"list" must be followed by one of property names script, type, gbreak or bidi.
|
||||
The defined values for that property are listed. */
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
|
@ -145,7 +149,7 @@ static const unsigned char *type_names[] = {
|
|||
US"So", US"Other symbol",
|
||||
US"Zl", US"Line separator",
|
||||
US"Zp", US"Paragraph separator",
|
||||
US"Zs", US"Space separator"
|
||||
US"Zs", US"Space separator"
|
||||
};
|
||||
|
||||
static const unsigned char *gb_names[] = {
|
||||
|
@ -166,6 +170,31 @@ static const unsigned char *gb_names[] = {
|
|||
US"Extended_Pictographic", US""
|
||||
};
|
||||
|
||||
static const unsigned char *bd_names[] = {
|
||||
US"AL", US"Arabic letter",
|
||||
US"AN", US"Arabid number",
|
||||
US"B", US"Paragraph separator",
|
||||
US"BN", US"Boundary neutral",
|
||||
US"CS", US"Common separator",
|
||||
US"EN", US"European number",
|
||||
US"ES", US"European separator",
|
||||
US"ET", US"European terminator",
|
||||
US"FSI", US"First string isolate",
|
||||
US"L", US"Left-to-right",
|
||||
US"LRE", US"Left-to-right embedding",
|
||||
US"LRI", US"Left-to-right isolate",
|
||||
US"LRO", US"Left-to-right override",
|
||||
US"NSM", US"Non-spacing mark",
|
||||
US"ON", US"Other neutral",
|
||||
US"PDF", US"Pop directional format",
|
||||
US"PDI", US"Pop directional isolate",
|
||||
US"R", US"Right-to-left",
|
||||
US"RLE", US"Right-to-left embedding",
|
||||
US"RLI", US"Right-to-left isolate",
|
||||
US"RLO", US"Right-to-left override",
|
||||
US"S", US"Segment separator",
|
||||
US"WS", US"White space"
|
||||
};
|
||||
|
||||
static const unsigned int utf8_table1[] = {
|
||||
0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
|
||||
|
@ -235,14 +264,14 @@ const ucp_type_table *u;
|
|||
|
||||
for (i = 0; i < PRIV(utt_size); i++)
|
||||
{
|
||||
u = PRIV(utt) + i;
|
||||
u = PRIV(utt) + i;
|
||||
if (u->type == PT_SC && u->value == script) break;
|
||||
}
|
||||
if (i < PRIV(utt_size))
|
||||
return PRIV(utt_names) + u->name_offset;
|
||||
|
||||
|
||||
return "??";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
|
@ -257,12 +286,15 @@ int fulltype = UCD_CHARTYPE(c);
|
|||
int script = UCD_SCRIPT(c);
|
||||
int scriptx = UCD_SCRIPTX(c);
|
||||
int gbprop = UCD_GRAPHBREAK(c);
|
||||
int bidi = UCD_BIDICLASS(c);
|
||||
int bidicontrol = UCD_BIDICONTROL(c);
|
||||
unsigned int othercase = UCD_OTHERCASE(c);
|
||||
int caseset = UCD_CASESET(c);
|
||||
|
||||
const unsigned char *fulltypename = US"??";
|
||||
const unsigned char *typename = US"??";
|
||||
const unsigned char *graphbreak = US"??";
|
||||
const unsigned char *bidiclass = US"??";
|
||||
const unsigned char *scriptname = CUS get_scriptname(script);
|
||||
|
||||
switch (type)
|
||||
|
@ -332,7 +364,37 @@ switch(gbprop)
|
|||
default: graphbreak = US"Unknown"; break;
|
||||
}
|
||||
|
||||
printf("U+%04X %s: %s, %s, %s", c, typename, fulltypename, scriptname, graphbreak);
|
||||
switch(bidi)
|
||||
{
|
||||
case ucp_bidiAL: bidiclass = US"AL "; break;
|
||||
case ucp_bidiFSI: bidiclass = US"FSI"; break;
|
||||
case ucp_bidiL: bidiclass = US"L "; break;
|
||||
case ucp_bidiLRE: bidiclass = US"LRE"; break;
|
||||
case ucp_bidiLRI: bidiclass = US"LRI"; break;
|
||||
case ucp_bidiLRO: bidiclass = US"LRO"; break;
|
||||
case ucp_bidiPDF: bidiclass = US"PDF"; break;
|
||||
case ucp_bidiPDI: bidiclass = US"PDI"; break;
|
||||
case ucp_bidiR: bidiclass = US"R "; break;
|
||||
case ucp_bidiRLE: bidiclass = US"RLE"; break;
|
||||
case ucp_bidiRLI: bidiclass = US"RLI"; break;
|
||||
case ucp_bidiRLO: bidiclass = US"RLO"; break;
|
||||
case ucp_bidiAN: bidiclass = US"AN "; break;
|
||||
case ucp_bidiB: bidiclass = US"B "; break;
|
||||
case ucp_bidiBN: bidiclass = US"BN "; break;
|
||||
case ucp_bidiCS: bidiclass = US"CS "; break;
|
||||
case ucp_bidiEN: bidiclass = US"EN "; break;
|
||||
case ucp_bidiES: bidiclass = US"ES "; break;
|
||||
case ucp_bidiET: bidiclass = US"ET "; break;
|
||||
case ucp_bidiNSM: bidiclass = US"NSM"; break;
|
||||
case ucp_bidiON: bidiclass = US"ON "; break;
|
||||
case ucp_bidiS: bidiclass = US"S "; break;
|
||||
case ucp_bidiWS: bidiclass = US"WS "; break;
|
||||
default: bidiclass = US"???"; break;
|
||||
}
|
||||
|
||||
printf("U+%04X %c%s %s: %s, %s, %s", c, bidicontrol? '*':' ', bidiclass,
|
||||
typename, fulltypename, scriptname, graphbreak);
|
||||
|
||||
if (is_just_one && othercase != c)
|
||||
{
|
||||
printf(", U+%04X", othercase);
|
||||
|
@ -341,9 +403,9 @@ if (is_just_one && othercase != c)
|
|||
const uint32_t *p = PRIV(ucd_caseless_sets) + caseset - 1;
|
||||
while (*(++p) < NOTACHAR)
|
||||
{
|
||||
unsigned int d = *p;
|
||||
unsigned int d = *p;
|
||||
if (d != othercase && d != c) printf(", U+%04X", d);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -364,13 +426,13 @@ if (scriptx != script)
|
|||
}
|
||||
printf("]");
|
||||
}
|
||||
|
||||
|
||||
if (show_character && is_just_one)
|
||||
{
|
||||
unsigned char buffer[8];
|
||||
size_t len = ord2utf8(c, buffer);
|
||||
printf(", >%.*s<", (int)len, buffer);
|
||||
}
|
||||
printf(", >%.*s<", (int)len, buffer);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
@ -394,9 +456,12 @@ uint32_t i, c;
|
|||
int script = -1;
|
||||
int type = -1;
|
||||
int gbreak = -1;
|
||||
int bidiclass = -1;
|
||||
BOOL bidicontrol = FALSE;
|
||||
BOOL script_not = FALSE;
|
||||
BOOL type_not = FALSE;
|
||||
BOOL gbreak_not = FALSE;
|
||||
BOOL bidiclass_not = FALSE;
|
||||
BOOL hadrange = FALSE;
|
||||
const ucd_record *ucd, *next_ucd;
|
||||
const char *pad = " ";
|
||||
|
@ -405,10 +470,12 @@ while (*s != 0)
|
|||
{
|
||||
unsigned int offset = 0;
|
||||
BOOL scriptx_not = FALSE;
|
||||
char *value_start;
|
||||
|
||||
for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s;
|
||||
*t = 0;
|
||||
while (isspace(*s)) s++;
|
||||
value_start = s;
|
||||
|
||||
for (t = value; *s != 0 && !isspace(*s); s++) *t++ = *s;
|
||||
*t = 0;
|
||||
|
@ -426,11 +493,11 @@ while (*s != 0)
|
|||
|
||||
for (i = 0; i < PRIV(utt_size); i++)
|
||||
{
|
||||
const ucp_type_table *u = PRIV(utt) + i;
|
||||
if (u->type == PT_SC && strcmp(CS(value + offset),
|
||||
const ucp_type_table *u = PRIV(utt) + i;
|
||||
if (u->type == PT_SC && strcmp(CS(value + offset),
|
||||
PRIV(utt_names) + u->name_offset) == 0)
|
||||
{
|
||||
c = u->value;
|
||||
c = u->value;
|
||||
if (name[6] == 'x')
|
||||
{
|
||||
scriptx_list[scriptx_count++] = scriptx_not? (-c):c;
|
||||
|
@ -516,6 +583,45 @@ while (*s != 0)
|
|||
}
|
||||
}
|
||||
|
||||
else if (strcmp(CS name, "bidi") == 0 ||
|
||||
strcmp(CS name, "bidiclass") == 0 ||
|
||||
strcmp(CS name, "bidi_class") == 0 )
|
||||
{
|
||||
if (bidiclass >= 0)
|
||||
{
|
||||
printf("** Only 1 bidi class value allowed\n");
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (value[0] == '!')
|
||||
{
|
||||
bidiclass_not = TRUE;
|
||||
offset = 1;
|
||||
}
|
||||
for (i = 0; i < sizeof(bd_names)/sizeof(char *); i += 2)
|
||||
{
|
||||
if (strcmp(CS (value + offset), CS bd_names[i]) == 0)
|
||||
{
|
||||
bidiclass = i/2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i >= sizeof(bd_names)/sizeof(char *))
|
||||
{
|
||||
printf("** Unrecognized bidi class name \"%s\"\n", value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
else if (strcmp(CS name, "bidi_control") == 0 ||
|
||||
strcmp(CS name, "bidicontrol") == 0)
|
||||
{
|
||||
bidicontrol = TRUE;
|
||||
s = value_start; /* No data */
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
printf("** Unrecognized property name \"%s\"\n", name);
|
||||
|
@ -523,7 +629,8 @@ while (*s != 0)
|
|||
}
|
||||
}
|
||||
|
||||
if (script < 0 && scriptx_count == 0 && type < 0 && gbreak < 0)
|
||||
if (script < 0 && scriptx_count == 0 && type < 0 && gbreak < 0 &&
|
||||
bidiclass < 0 && !bidicontrol)
|
||||
{
|
||||
printf("** No properties specified\n");
|
||||
return;
|
||||
|
@ -608,6 +715,20 @@ for (c = 0; c <= 0x10ffff; c++)
|
|||
}
|
||||
}
|
||||
|
||||
if (bidiclass >= 0)
|
||||
{
|
||||
if (bidiclass_not)
|
||||
{
|
||||
if (bidiclass == UCD_BIDICLASS(c)) continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (bidiclass != UCD_BIDICLASS(c)) continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (bidicontrol && UCD_BIDICONTROL(c) == 0) continue;
|
||||
|
||||
/* All conditions are met. Look for runs. */
|
||||
|
||||
ucd = GET_UCD(c);
|
||||
|
@ -663,9 +784,9 @@ if (strcmp(CS name, "findprop") == 0)
|
|||
{
|
||||
while (*s != 0)
|
||||
{
|
||||
unsigned int c;
|
||||
unsigned int c;
|
||||
unsigned char *endptr;
|
||||
t = s;
|
||||
t = s;
|
||||
if (strncmp(CS t, "U+", 2) == 0) t += 2;
|
||||
c = strtoul(CS t, CSS(&endptr), 16);
|
||||
if (*endptr != 0 && !isspace(*endptr))
|
||||
|
@ -673,13 +794,13 @@ if (strcmp(CS name, "findprop") == 0)
|
|||
while (*endptr != 0 && !isspace(*endptr)) endptr++;
|
||||
printf("** Invalid hex number: ignored \"%.*s\"\n", (int)(endptr-s), s);
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
if (c > 0x10ffff)
|
||||
if (c > 0x10ffff)
|
||||
printf("** U+%x is too big for a Unicode code point\n", c);
|
||||
else
|
||||
else
|
||||
print_prop(c, TRUE);
|
||||
}
|
||||
}
|
||||
s = endptr;
|
||||
while (isspace(*s)) s++;
|
||||
}
|
||||
|
@ -689,7 +810,7 @@ else if (strcmp(CS name, "find") == 0)
|
|||
{
|
||||
find_chars(s);
|
||||
}
|
||||
|
||||
|
||||
else if (strcmp(CS name, "list") == 0)
|
||||
{
|
||||
while (*s != 0)
|
||||
|
@ -698,38 +819,45 @@ else if (strcmp(CS name, "list") == 0)
|
|||
for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s;
|
||||
*t = 0;
|
||||
while (isspace(*s)) s++;
|
||||
|
||||
|
||||
if (strcmp(CS name, "script") == 0 || strcmp(CS name, "scripts") == 0)
|
||||
{
|
||||
for (i = 0; i < PRIV(utt_size); i++)
|
||||
for (i = 0; i < PRIV(utt_size); i++)
|
||||
if (PRIV(utt)[i].type == PT_SC)
|
||||
printf("%s\n", PRIV(utt_names) + PRIV(utt)[i].name_offset);
|
||||
printf("%s\n", PRIV(utt_names) + PRIV(utt)[i].name_offset);
|
||||
}
|
||||
|
||||
|
||||
else if (strcmp(CS name, "type") == 0 || strcmp(CS name, "types") == 0)
|
||||
{
|
||||
for (i = 0; i < sizeof(type_names)/sizeof(char *); i += 2)
|
||||
printf("%s %s\n", type_names[i], type_names[i+1]);
|
||||
}
|
||||
|
||||
printf("%s %s\n", type_names[i], type_names[i+1]);
|
||||
}
|
||||
|
||||
else if (strcmp(CS name, "gbreak") == 0 || strcmp(CS name, "gbreaks") == 0)
|
||||
{
|
||||
for (i = 0; i < sizeof(gb_names)/sizeof(char *); i += 2)
|
||||
{
|
||||
if (gb_names[i+1][0] != 0)
|
||||
if (gb_names[i+1][0] != 0)
|
||||
printf("%-3s (%s)\n", gb_names[i], gb_names[i+1]);
|
||||
else
|
||||
else
|
||||
printf("%s\n", gb_names[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
else if (strcmp(CS name, "bidi") == 0 ||
|
||||
strcmp(CS name, "bidiclasses") == 0)
|
||||
{
|
||||
printf("** Unknown property \"%s\"\n", name);
|
||||
for (i = 0; i < sizeof(bd_names)/sizeof(char *); i += 2)
|
||||
printf("%3s %s\n", bd_names[i], bd_names[i+1]);
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
printf("** Unknown property \"%s\"\n", name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
else printf("** Unknown test command \"%s\"\n", name);
|
||||
}
|
||||
|
@ -751,32 +879,32 @@ if (argc > 1 && strcmp(argv[1], "-s") == 0)
|
|||
{
|
||||
show_character = TRUE;
|
||||
first_arg++;
|
||||
}
|
||||
}
|
||||
|
||||
if (argc > first_arg)
|
||||
{
|
||||
int i;
|
||||
BOOL hexfirst = TRUE;
|
||||
char *arg = argv[first_arg];
|
||||
BOOL hexfirst = TRUE;
|
||||
char *arg = argv[first_arg];
|
||||
unsigned char *s = buffer;
|
||||
|
||||
if (strncmp(arg, "U+", 2) != 0 && !isdigit(*arg))
|
||||
|
||||
if (strncmp(arg, "U+", 2) != 0 && !isdigit(*arg))
|
||||
{
|
||||
while (*arg != 0)
|
||||
while (*arg != 0)
|
||||
{
|
||||
if (!isxdigit(*arg++)) { hexfirst = FALSE; break; }
|
||||
}
|
||||
}
|
||||
|
||||
if (!isxdigit(*arg++)) { hexfirst = FALSE; break; }
|
||||
}
|
||||
}
|
||||
|
||||
if (hexfirst)
|
||||
{
|
||||
strcpy(CS s, "findprop ");
|
||||
s += 9;
|
||||
}
|
||||
|
||||
|
||||
for (i = first_arg; i < argc; i++)
|
||||
{
|
||||
s += sprintf(CS s, "%s ", argv[i]);
|
||||
s += sprintf(CS s, "%s ", argv[i]);
|
||||
}
|
||||
|
||||
process_command_line(buffer);
|
||||
|
@ -812,7 +940,7 @@ for(;;)
|
|||
if (fgets(CS buffer, sizeof(buffer), stdin) == NULL) break;
|
||||
if (!interactive) printf("%s", buffer);
|
||||
}
|
||||
|
||||
|
||||
process_command_line(buffer);
|
||||
}
|
||||
|
||||
|
|
|
@ -46,3 +46,5 @@ findprop 32ff
|
|||
findprop 1f16d
|
||||
|
||||
findprop U+10e93 U+10eaa
|
||||
|
||||
findprop 0602 202a 202b 202c 2068 2069 202d 202e 2067
|
||||
|
|
|
@ -4,3 +4,14 @@ find type Sk
|
|||
find type Pd
|
||||
find gbreak LVT
|
||||
find script Old_Uyghur
|
||||
find bidi PDF
|
||||
find bidi CS
|
||||
find bidi CS type Sm
|
||||
find bidi B
|
||||
find bidi FSI
|
||||
find bidi PDI
|
||||
find bidi RLI
|
||||
find bidi RLO
|
||||
find bidi S
|
||||
find bidi WS
|
||||
find bidi_control
|
||||
|
|
|
@ -1,398 +1,409 @@
|
|||
findprop 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
|
||||
U+0000 Control: Control, Common, Control
|
||||
U+0001 Control: Control, Common, Control
|
||||
U+0002 Control: Control, Common, Control
|
||||
U+0003 Control: Control, Common, Control
|
||||
U+0004 Control: Control, Common, Control
|
||||
U+0005 Control: Control, Common, Control
|
||||
U+0006 Control: Control, Common, Control
|
||||
U+0007 Control: Control, Common, Control
|
||||
U+0008 Control: Control, Common, Control
|
||||
U+0009 Control: Control, Common, Control
|
||||
U+000A Control: Control, Common, LF
|
||||
U+000B Control: Control, Common, Control
|
||||
U+000C Control: Control, Common, Control
|
||||
U+000D Control: Control, Common, CR
|
||||
U+000E Control: Control, Common, Control
|
||||
U+000F Control: Control, Common, Control
|
||||
U+0000 BN Control: Control, Common, Control
|
||||
U+0001 BN Control: Control, Common, Control
|
||||
U+0002 BN Control: Control, Common, Control
|
||||
U+0003 BN Control: Control, Common, Control
|
||||
U+0004 BN Control: Control, Common, Control
|
||||
U+0005 BN Control: Control, Common, Control
|
||||
U+0006 BN Control: Control, Common, Control
|
||||
U+0007 BN Control: Control, Common, Control
|
||||
U+0008 BN Control: Control, Common, Control
|
||||
U+0009 S Control: Control, Common, Control
|
||||
U+000A B Control: Control, Common, LF
|
||||
U+000B S Control: Control, Common, Control
|
||||
U+000C WS Control: Control, Common, Control
|
||||
U+000D B Control: Control, Common, CR
|
||||
U+000E BN Control: Control, Common, Control
|
||||
U+000F BN Control: Control, Common, Control
|
||||
findprop 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
|
||||
U+0010 Control: Control, Common, Control
|
||||
U+0011 Control: Control, Common, Control
|
||||
U+0012 Control: Control, Common, Control
|
||||
U+0013 Control: Control, Common, Control
|
||||
U+0014 Control: Control, Common, Control
|
||||
U+0015 Control: Control, Common, Control
|
||||
U+0016 Control: Control, Common, Control
|
||||
U+0017 Control: Control, Common, Control
|
||||
U+0018 Control: Control, Common, Control
|
||||
U+0019 Control: Control, Common, Control
|
||||
U+001A Control: Control, Common, Control
|
||||
U+001B Control: Control, Common, Control
|
||||
U+001C Control: Control, Common, Control
|
||||
U+001D Control: Control, Common, Control
|
||||
U+001E Control: Control, Common, Control
|
||||
U+001F Control: Control, Common, Control
|
||||
U+0010 BN Control: Control, Common, Control
|
||||
U+0011 BN Control: Control, Common, Control
|
||||
U+0012 BN Control: Control, Common, Control
|
||||
U+0013 BN Control: Control, Common, Control
|
||||
U+0014 BN Control: Control, Common, Control
|
||||
U+0015 BN Control: Control, Common, Control
|
||||
U+0016 BN Control: Control, Common, Control
|
||||
U+0017 BN Control: Control, Common, Control
|
||||
U+0018 BN Control: Control, Common, Control
|
||||
U+0019 BN Control: Control, Common, Control
|
||||
U+001A BN Control: Control, Common, Control
|
||||
U+001B BN Control: Control, Common, Control
|
||||
U+001C B Control: Control, Common, Control
|
||||
U+001D B Control: Control, Common, Control
|
||||
U+001E B Control: Control, Common, Control
|
||||
U+001F S Control: Control, Common, Control
|
||||
findprop 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
|
||||
U+0020 Separator: Space separator, Common, Other
|
||||
U+0021 Punctuation: Other punctuation, Common, Other
|
||||
U+0022 Punctuation: Other punctuation, Common, Other
|
||||
U+0023 Punctuation: Other punctuation, Common, Other
|
||||
U+0024 Symbol: Currency symbol, Common, Other
|
||||
U+0025 Punctuation: Other punctuation, Common, Other
|
||||
U+0026 Punctuation: Other punctuation, Common, Other
|
||||
U+0027 Punctuation: Other punctuation, Common, Other
|
||||
U+0028 Punctuation: Open punctuation, Common, Other
|
||||
U+0029 Punctuation: Close punctuation, Common, Other
|
||||
U+002A Punctuation: Other punctuation, Common, Other
|
||||
U+002B Symbol: Mathematical symbol, Common, Other
|
||||
U+002C Punctuation: Other punctuation, Common, Other
|
||||
U+002D Punctuation: Dash punctuation, Common, Other
|
||||
U+002E Punctuation: Other punctuation, Common, Other
|
||||
U+002F Punctuation: Other punctuation, Common, Other
|
||||
U+0020 WS Separator: Space separator, Common, Other
|
||||
U+0021 ON Punctuation: Other punctuation, Common, Other
|
||||
U+0022 ON Punctuation: Other punctuation, Common, Other
|
||||
U+0023 ET Punctuation: Other punctuation, Common, Other
|
||||
U+0024 ET Symbol: Currency symbol, Common, Other
|
||||
U+0025 ET Punctuation: Other punctuation, Common, Other
|
||||
U+0026 ON Punctuation: Other punctuation, Common, Other
|
||||
U+0027 ON Punctuation: Other punctuation, Common, Other
|
||||
U+0028 ON Punctuation: Open punctuation, Common, Other
|
||||
U+0029 ON Punctuation: Close punctuation, Common, Other
|
||||
U+002A ON Punctuation: Other punctuation, Common, Other
|
||||
U+002B ES Symbol: Mathematical symbol, Common, Other
|
||||
U+002C CS Punctuation: Other punctuation, Common, Other
|
||||
U+002D ES Punctuation: Dash punctuation, Common, Other
|
||||
U+002E CS Punctuation: Other punctuation, Common, Other
|
||||
U+002F CS Punctuation: Other punctuation, Common, Other
|
||||
findprop 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
|
||||
U+0030 Number: Decimal number, Common, Other
|
||||
U+0031 Number: Decimal number, Common, Other
|
||||
U+0032 Number: Decimal number, Common, Other
|
||||
U+0033 Number: Decimal number, Common, Other
|
||||
U+0034 Number: Decimal number, Common, Other
|
||||
U+0035 Number: Decimal number, Common, Other
|
||||
U+0036 Number: Decimal number, Common, Other
|
||||
U+0037 Number: Decimal number, Common, Other
|
||||
U+0038 Number: Decimal number, Common, Other
|
||||
U+0039 Number: Decimal number, Common, Other
|
||||
U+003A Punctuation: Other punctuation, Common, Other
|
||||
U+003B Punctuation: Other punctuation, Common, Other
|
||||
U+003C Symbol: Mathematical symbol, Common, Other
|
||||
U+003D Symbol: Mathematical symbol, Common, Other
|
||||
U+003E Symbol: Mathematical symbol, Common, Other
|
||||
U+003F Punctuation: Other punctuation, Common, Other
|
||||
U+0030 EN Number: Decimal number, Common, Other
|
||||
U+0031 EN Number: Decimal number, Common, Other
|
||||
U+0032 EN Number: Decimal number, Common, Other
|
||||
U+0033 EN Number: Decimal number, Common, Other
|
||||
U+0034 EN Number: Decimal number, Common, Other
|
||||
U+0035 EN Number: Decimal number, Common, Other
|
||||
U+0036 EN Number: Decimal number, Common, Other
|
||||
U+0037 EN Number: Decimal number, Common, Other
|
||||
U+0038 EN Number: Decimal number, Common, Other
|
||||
U+0039 EN Number: Decimal number, Common, Other
|
||||
U+003A CS Punctuation: Other punctuation, Common, Other
|
||||
U+003B ON Punctuation: Other punctuation, Common, Other
|
||||
U+003C ON Symbol: Mathematical symbol, Common, Other
|
||||
U+003D ON Symbol: Mathematical symbol, Common, Other
|
||||
U+003E ON Symbol: Mathematical symbol, Common, Other
|
||||
U+003F ON Punctuation: Other punctuation, Common, Other
|
||||
findprop 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
|
||||
U+0040 Punctuation: Other punctuation, Common, Other
|
||||
U+0041 Letter: Upper case letter, Latin, Other, U+0061
|
||||
U+0042 Letter: Upper case letter, Latin, Other, U+0062
|
||||
U+0043 Letter: Upper case letter, Latin, Other, U+0063
|
||||
U+0044 Letter: Upper case letter, Latin, Other, U+0064
|
||||
U+0045 Letter: Upper case letter, Latin, Other, U+0065
|
||||
U+0046 Letter: Upper case letter, Latin, Other, U+0066
|
||||
U+0047 Letter: Upper case letter, Latin, Other, U+0067
|
||||
U+0048 Letter: Upper case letter, Latin, Other, U+0068
|
||||
U+0049 Letter: Upper case letter, Latin, Other, U+0069
|
||||
U+004A Letter: Upper case letter, Latin, Other, U+006A
|
||||
U+004B Letter: Upper case letter, Latin, Other, U+006B, U+212A
|
||||
U+004C Letter: Upper case letter, Latin, Other, U+006C
|
||||
U+004D Letter: Upper case letter, Latin, Other, U+006D
|
||||
U+004E Letter: Upper case letter, Latin, Other, U+006E
|
||||
U+004F Letter: Upper case letter, Latin, Other, U+006F
|
||||
U+0040 ON Punctuation: Other punctuation, Common, Other
|
||||
U+0041 L Letter: Upper case letter, Latin, Other, U+0061
|
||||
U+0042 L Letter: Upper case letter, Latin, Other, U+0062
|
||||
U+0043 L Letter: Upper case letter, Latin, Other, U+0063
|
||||
U+0044 L Letter: Upper case letter, Latin, Other, U+0064
|
||||
U+0045 L Letter: Upper case letter, Latin, Other, U+0065
|
||||
U+0046 L Letter: Upper case letter, Latin, Other, U+0066
|
||||
U+0047 L Letter: Upper case letter, Latin, Other, U+0067
|
||||
U+0048 L Letter: Upper case letter, Latin, Other, U+0068
|
||||
U+0049 L Letter: Upper case letter, Latin, Other, U+0069
|
||||
U+004A L Letter: Upper case letter, Latin, Other, U+006A
|
||||
U+004B L Letter: Upper case letter, Latin, Other, U+006B, U+212A
|
||||
U+004C L Letter: Upper case letter, Latin, Other, U+006C
|
||||
U+004D L Letter: Upper case letter, Latin, Other, U+006D
|
||||
U+004E L Letter: Upper case letter, Latin, Other, U+006E
|
||||
U+004F L Letter: Upper case letter, Latin, Other, U+006F
|
||||
findprop 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
|
||||
U+0050 Letter: Upper case letter, Latin, Other, U+0070
|
||||
U+0051 Letter: Upper case letter, Latin, Other, U+0071
|
||||
U+0052 Letter: Upper case letter, Latin, Other, U+0072
|
||||
U+0053 Letter: Upper case letter, Latin, Other, U+0073, U+017F
|
||||
U+0054 Letter: Upper case letter, Latin, Other, U+0074
|
||||
U+0055 Letter: Upper case letter, Latin, Other, U+0075
|
||||
U+0056 Letter: Upper case letter, Latin, Other, U+0076
|
||||
U+0057 Letter: Upper case letter, Latin, Other, U+0077
|
||||
U+0058 Letter: Upper case letter, Latin, Other, U+0078
|
||||
U+0059 Letter: Upper case letter, Latin, Other, U+0079
|
||||
U+005A Letter: Upper case letter, Latin, Other, U+007A
|
||||
U+005B Punctuation: Open punctuation, Common, Other
|
||||
U+005C Punctuation: Other punctuation, Common, Other
|
||||
U+005D Punctuation: Close punctuation, Common, Other
|
||||
U+005E Symbol: Modifier symbol, Common, Other
|
||||
U+005F Punctuation: Connector punctuation, Common, Other
|
||||
U+0050 L Letter: Upper case letter, Latin, Other, U+0070
|
||||
U+0051 L Letter: Upper case letter, Latin, Other, U+0071
|
||||
U+0052 L Letter: Upper case letter, Latin, Other, U+0072
|
||||
U+0053 L Letter: Upper case letter, Latin, Other, U+0073, U+017F
|
||||
U+0054 L Letter: Upper case letter, Latin, Other, U+0074
|
||||
U+0055 L Letter: Upper case letter, Latin, Other, U+0075
|
||||
U+0056 L Letter: Upper case letter, Latin, Other, U+0076
|
||||
U+0057 L Letter: Upper case letter, Latin, Other, U+0077
|
||||
U+0058 L Letter: Upper case letter, Latin, Other, U+0078
|
||||
U+0059 L Letter: Upper case letter, Latin, Other, U+0079
|
||||
U+005A L Letter: Upper case letter, Latin, Other, U+007A
|
||||
U+005B ON Punctuation: Open punctuation, Common, Other
|
||||
U+005C ON Punctuation: Other punctuation, Common, Other
|
||||
U+005D ON Punctuation: Close punctuation, Common, Other
|
||||
U+005E ON Symbol: Modifier symbol, Common, Other
|
||||
U+005F ON Punctuation: Connector punctuation, Common, Other
|
||||
findprop 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
|
||||
U+0060 Symbol: Modifier symbol, Common, Other
|
||||
U+0061 Letter: Lower case letter, Latin, Other, U+0041
|
||||
U+0062 Letter: Lower case letter, Latin, Other, U+0042
|
||||
U+0063 Letter: Lower case letter, Latin, Other, U+0043
|
||||
U+0064 Letter: Lower case letter, Latin, Other, U+0044
|
||||
U+0065 Letter: Lower case letter, Latin, Other, U+0045
|
||||
U+0066 Letter: Lower case letter, Latin, Other, U+0046
|
||||
U+0067 Letter: Lower case letter, Latin, Other, U+0047
|
||||
U+0068 Letter: Lower case letter, Latin, Other, U+0048
|
||||
U+0069 Letter: Lower case letter, Latin, Other, U+0049
|
||||
U+006A Letter: Lower case letter, Latin, Other, U+004A
|
||||
U+006B Letter: Lower case letter, Latin, Other, U+004B, U+212A
|
||||
U+006C Letter: Lower case letter, Latin, Other, U+004C
|
||||
U+006D Letter: Lower case letter, Latin, Other, U+004D
|
||||
U+006E Letter: Lower case letter, Latin, Other, U+004E
|
||||
U+006F Letter: Lower case letter, Latin, Other, U+004F
|
||||
U+0060 ON Symbol: Modifier symbol, Common, Other
|
||||
U+0061 L Letter: Lower case letter, Latin, Other, U+0041
|
||||
U+0062 L Letter: Lower case letter, Latin, Other, U+0042
|
||||
U+0063 L Letter: Lower case letter, Latin, Other, U+0043
|
||||
U+0064 L Letter: Lower case letter, Latin, Other, U+0044
|
||||
U+0065 L Letter: Lower case letter, Latin, Other, U+0045
|
||||
U+0066 L Letter: Lower case letter, Latin, Other, U+0046
|
||||
U+0067 L Letter: Lower case letter, Latin, Other, U+0047
|
||||
U+0068 L Letter: Lower case letter, Latin, Other, U+0048
|
||||
U+0069 L Letter: Lower case letter, Latin, Other, U+0049
|
||||
U+006A L Letter: Lower case letter, Latin, Other, U+004A
|
||||
U+006B L Letter: Lower case letter, Latin, Other, U+004B, U+212A
|
||||
U+006C L Letter: Lower case letter, Latin, Other, U+004C
|
||||
U+006D L Letter: Lower case letter, Latin, Other, U+004D
|
||||
U+006E L Letter: Lower case letter, Latin, Other, U+004E
|
||||
U+006F L Letter: Lower case letter, Latin, Other, U+004F
|
||||
findprop 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
|
||||
U+0070 Letter: Lower case letter, Latin, Other, U+0050
|
||||
U+0071 Letter: Lower case letter, Latin, Other, U+0051
|
||||
U+0072 Letter: Lower case letter, Latin, Other, U+0052
|
||||
U+0073 Letter: Lower case letter, Latin, Other, U+0053, U+017F
|
||||
U+0074 Letter: Lower case letter, Latin, Other, U+0054
|
||||
U+0075 Letter: Lower case letter, Latin, Other, U+0055
|
||||
U+0076 Letter: Lower case letter, Latin, Other, U+0056
|
||||
U+0077 Letter: Lower case letter, Latin, Other, U+0057
|
||||
U+0078 Letter: Lower case letter, Latin, Other, U+0058
|
||||
U+0079 Letter: Lower case letter, Latin, Other, U+0059
|
||||
U+007A Letter: Lower case letter, Latin, Other, U+005A
|
||||
U+007B Punctuation: Open punctuation, Common, Other
|
||||
U+007C Symbol: Mathematical symbol, Common, Other
|
||||
U+007D Punctuation: Close punctuation, Common, Other
|
||||
U+007E Symbol: Mathematical symbol, Common, Other
|
||||
U+007F Control: Control, Common, Control
|
||||
U+0070 L Letter: Lower case letter, Latin, Other, U+0050
|
||||
U+0071 L Letter: Lower case letter, Latin, Other, U+0051
|
||||
U+0072 L Letter: Lower case letter, Latin, Other, U+0052
|
||||
U+0073 L Letter: Lower case letter, Latin, Other, U+0053, U+017F
|
||||
U+0074 L Letter: Lower case letter, Latin, Other, U+0054
|
||||
U+0075 L Letter: Lower case letter, Latin, Other, U+0055
|
||||
U+0076 L Letter: Lower case letter, Latin, Other, U+0056
|
||||
U+0077 L Letter: Lower case letter, Latin, Other, U+0057
|
||||
U+0078 L Letter: Lower case letter, Latin, Other, U+0058
|
||||
U+0079 L Letter: Lower case letter, Latin, Other, U+0059
|
||||
U+007A L Letter: Lower case letter, Latin, Other, U+005A
|
||||
U+007B ON Punctuation: Open punctuation, Common, Other
|
||||
U+007C ON Symbol: Mathematical symbol, Common, Other
|
||||
U+007D ON Punctuation: Close punctuation, Common, Other
|
||||
U+007E ON Symbol: Mathematical symbol, Common, Other
|
||||
U+007F BN Control: Control, Common, Control
|
||||
|
||||
findprop 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
|
||||
U+0080 Control: Control, Common, Control
|
||||
U+0081 Control: Control, Common, Control
|
||||
U+0082 Control: Control, Common, Control
|
||||
U+0083 Control: Control, Common, Control
|
||||
U+0084 Control: Control, Common, Control
|
||||
U+0085 Control: Control, Common, Control
|
||||
U+0086 Control: Control, Common, Control
|
||||
U+0087 Control: Control, Common, Control
|
||||
U+0088 Control: Control, Common, Control
|
||||
U+0089 Control: Control, Common, Control
|
||||
U+008A Control: Control, Common, Control
|
||||
U+008B Control: Control, Common, Control
|
||||
U+008C Control: Control, Common, Control
|
||||
U+008D Control: Control, Common, Control
|
||||
U+008E Control: Control, Common, Control
|
||||
U+008F Control: Control, Common, Control
|
||||
U+0080 BN Control: Control, Common, Control
|
||||
U+0081 BN Control: Control, Common, Control
|
||||
U+0082 BN Control: Control, Common, Control
|
||||
U+0083 BN Control: Control, Common, Control
|
||||
U+0084 BN Control: Control, Common, Control
|
||||
U+0085 B Control: Control, Common, Control
|
||||
U+0086 BN Control: Control, Common, Control
|
||||
U+0087 BN Control: Control, Common, Control
|
||||
U+0088 BN Control: Control, Common, Control
|
||||
U+0089 BN Control: Control, Common, Control
|
||||
U+008A BN Control: Control, Common, Control
|
||||
U+008B BN Control: Control, Common, Control
|
||||
U+008C BN Control: Control, Common, Control
|
||||
U+008D BN Control: Control, Common, Control
|
||||
U+008E BN Control: Control, Common, Control
|
||||
U+008F BN Control: Control, Common, Control
|
||||
findprop 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f
|
||||
U+0090 Control: Control, Common, Control
|
||||
U+0091 Control: Control, Common, Control
|
||||
U+0092 Control: Control, Common, Control
|
||||
U+0093 Control: Control, Common, Control
|
||||
U+0094 Control: Control, Common, Control
|
||||
U+0095 Control: Control, Common, Control
|
||||
U+0096 Control: Control, Common, Control
|
||||
U+0097 Control: Control, Common, Control
|
||||
U+0098 Control: Control, Common, Control
|
||||
U+0099 Control: Control, Common, Control
|
||||
U+009A Control: Control, Common, Control
|
||||
U+009B Control: Control, Common, Control
|
||||
U+009C Control: Control, Common, Control
|
||||
U+009D Control: Control, Common, Control
|
||||
U+009E Control: Control, Common, Control
|
||||
U+009F Control: Control, Common, Control
|
||||
U+0090 BN Control: Control, Common, Control
|
||||
U+0091 BN Control: Control, Common, Control
|
||||
U+0092 BN Control: Control, Common, Control
|
||||
U+0093 BN Control: Control, Common, Control
|
||||
U+0094 BN Control: Control, Common, Control
|
||||
U+0095 BN Control: Control, Common, Control
|
||||
U+0096 BN Control: Control, Common, Control
|
||||
U+0097 BN Control: Control, Common, Control
|
||||
U+0098 BN Control: Control, Common, Control
|
||||
U+0099 BN Control: Control, Common, Control
|
||||
U+009A BN Control: Control, Common, Control
|
||||
U+009B BN Control: Control, Common, Control
|
||||
U+009C BN Control: Control, Common, Control
|
||||
U+009D BN Control: Control, Common, Control
|
||||
U+009E BN Control: Control, Common, Control
|
||||
U+009F BN Control: Control, Common, Control
|
||||
findprop a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af
|
||||
U+00A0 Separator: Space separator, Common, Other
|
||||
U+00A1 Punctuation: Other punctuation, Common, Other
|
||||
U+00A2 Symbol: Currency symbol, Common, Other
|
||||
U+00A3 Symbol: Currency symbol, Common, Other
|
||||
U+00A4 Symbol: Currency symbol, Common, Other
|
||||
U+00A5 Symbol: Currency symbol, Common, Other
|
||||
U+00A6 Symbol: Other symbol, Common, Other
|
||||
U+00A7 Punctuation: Other punctuation, Common, Other
|
||||
U+00A8 Symbol: Modifier symbol, Common, Other
|
||||
U+00A9 Symbol: Other symbol, Common, Extended Pictographic
|
||||
U+00AA Letter: Other letter, Latin, Other
|
||||
U+00AB Punctuation: Initial punctuation, Common, Other
|
||||
U+00AC Symbol: Mathematical symbol, Common, Other
|
||||
U+00AD Control: Format, Common, Control
|
||||
U+00AE Symbol: Other symbol, Common, Extended Pictographic
|
||||
U+00AF Symbol: Modifier symbol, Common, Other
|
||||
U+00A0 CS Separator: Space separator, Common, Other
|
||||
U+00A1 ON Punctuation: Other punctuation, Common, Other
|
||||
U+00A2 ET Symbol: Currency symbol, Common, Other
|
||||
U+00A3 ET Symbol: Currency symbol, Common, Other
|
||||
U+00A4 ET Symbol: Currency symbol, Common, Other
|
||||
U+00A5 ET Symbol: Currency symbol, Common, Other
|
||||
U+00A6 ON Symbol: Other symbol, Common, Other
|
||||
U+00A7 ON Punctuation: Other punctuation, Common, Other
|
||||
U+00A8 ON Symbol: Modifier symbol, Common, Other
|
||||
U+00A9 ON Symbol: Other symbol, Common, Extended Pictographic
|
||||
U+00AA L Letter: Other letter, Latin, Other
|
||||
U+00AB ON Punctuation: Initial punctuation, Common, Other
|
||||
U+00AC ON Symbol: Mathematical symbol, Common, Other
|
||||
U+00AD BN Control: Format, Common, Control
|
||||
U+00AE ON Symbol: Other symbol, Common, Extended Pictographic
|
||||
U+00AF ON Symbol: Modifier symbol, Common, Other
|
||||
findprop b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf
|
||||
U+00B0 Symbol: Other symbol, Common, Other
|
||||
U+00B1 Symbol: Mathematical symbol, Common, Other
|
||||
U+00B2 Number: Other number, Common, Other
|
||||
U+00B3 Number: Other number, Common, Other
|
||||
U+00B4 Symbol: Modifier symbol, Common, Other
|
||||
U+00B5 Letter: Lower case letter, Common, Other, U+03BC, U+039C
|
||||
U+00B6 Punctuation: Other punctuation, Common, Other
|
||||
U+00B7 Punctuation: Other punctuation, Common, Other
|
||||
U+00B8 Symbol: Modifier symbol, Common, Other
|
||||
U+00B9 Number: Other number, Common, Other
|
||||
U+00BA Letter: Other letter, Latin, Other
|
||||
U+00BB Punctuation: Final punctuation, Common, Other
|
||||
U+00BC Number: Other number, Common, Other
|
||||
U+00BD Number: Other number, Common, Other
|
||||
U+00BE Number: Other number, Common, Other
|
||||
U+00BF Punctuation: Other punctuation, Common, Other
|
||||
U+00B0 ET Symbol: Other symbol, Common, Other
|
||||
U+00B1 ET Symbol: Mathematical symbol, Common, Other
|
||||
U+00B2 EN Number: Other number, Common, Other
|
||||
U+00B3 EN Number: Other number, Common, Other
|
||||
U+00B4 ON Symbol: Modifier symbol, Common, Other
|
||||
U+00B5 L Letter: Lower case letter, Common, Other, U+03BC, U+039C
|
||||
U+00B6 ON Punctuation: Other punctuation, Common, Other
|
||||
U+00B7 ON Punctuation: Other punctuation, Common, Other
|
||||
U+00B8 ON Symbol: Modifier symbol, Common, Other
|
||||
U+00B9 EN Number: Other number, Common, Other
|
||||
U+00BA L Letter: Other letter, Latin, Other
|
||||
U+00BB ON Punctuation: Final punctuation, Common, Other
|
||||
U+00BC ON Number: Other number, Common, Other
|
||||
U+00BD ON Number: Other number, Common, Other
|
||||
U+00BE ON Number: Other number, Common, Other
|
||||
U+00BF ON Punctuation: Other punctuation, Common, Other
|
||||
findprop c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf
|
||||
U+00C0 Letter: Upper case letter, Latin, Other, U+00E0
|
||||
U+00C1 Letter: Upper case letter, Latin, Other, U+00E1
|
||||
U+00C2 Letter: Upper case letter, Latin, Other, U+00E2
|
||||
U+00C3 Letter: Upper case letter, Latin, Other, U+00E3
|
||||
U+00C4 Letter: Upper case letter, Latin, Other, U+00E4
|
||||
U+00C5 Letter: Upper case letter, Latin, Other, U+00E5, U+212B
|
||||
U+00C6 Letter: Upper case letter, Latin, Other, U+00E6
|
||||
U+00C7 Letter: Upper case letter, Latin, Other, U+00E7
|
||||
U+00C8 Letter: Upper case letter, Latin, Other, U+00E8
|
||||
U+00C9 Letter: Upper case letter, Latin, Other, U+00E9
|
||||
U+00CA Letter: Upper case letter, Latin, Other, U+00EA
|
||||
U+00CB Letter: Upper case letter, Latin, Other, U+00EB
|
||||
U+00CC Letter: Upper case letter, Latin, Other, U+00EC
|
||||
U+00CD Letter: Upper case letter, Latin, Other, U+00ED
|
||||
U+00CE Letter: Upper case letter, Latin, Other, U+00EE
|
||||
U+00CF Letter: Upper case letter, Latin, Other, U+00EF
|
||||
U+00C0 L Letter: Upper case letter, Latin, Other, U+00E0
|
||||
U+00C1 L Letter: Upper case letter, Latin, Other, U+00E1
|
||||
U+00C2 L Letter: Upper case letter, Latin, Other, U+00E2
|
||||
U+00C3 L Letter: Upper case letter, Latin, Other, U+00E3
|
||||
U+00C4 L Letter: Upper case letter, Latin, Other, U+00E4
|
||||
U+00C5 L Letter: Upper case letter, Latin, Other, U+00E5, U+212B
|
||||
U+00C6 L Letter: Upper case letter, Latin, Other, U+00E6
|
||||
U+00C7 L Letter: Upper case letter, Latin, Other, U+00E7
|
||||
U+00C8 L Letter: Upper case letter, Latin, Other, U+00E8
|
||||
U+00C9 L Letter: Upper case letter, Latin, Other, U+00E9
|
||||
U+00CA L Letter: Upper case letter, Latin, Other, U+00EA
|
||||
U+00CB L Letter: Upper case letter, Latin, Other, U+00EB
|
||||
U+00CC L Letter: Upper case letter, Latin, Other, U+00EC
|
||||
U+00CD L Letter: Upper case letter, Latin, Other, U+00ED
|
||||
U+00CE L Letter: Upper case letter, Latin, Other, U+00EE
|
||||
U+00CF L Letter: Upper case letter, Latin, Other, U+00EF
|
||||
findprop d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db dc dd de df
|
||||
U+00D0 Letter: Upper case letter, Latin, Other, U+00F0
|
||||
U+00D1 Letter: Upper case letter, Latin, Other, U+00F1
|
||||
U+00D2 Letter: Upper case letter, Latin, Other, U+00F2
|
||||
U+00D3 Letter: Upper case letter, Latin, Other, U+00F3
|
||||
U+00D4 Letter: Upper case letter, Latin, Other, U+00F4
|
||||
U+00D5 Letter: Upper case letter, Latin, Other, U+00F5
|
||||
U+00D6 Letter: Upper case letter, Latin, Other, U+00F6
|
||||
U+00D7 Symbol: Mathematical symbol, Common, Other
|
||||
U+00D8 Letter: Upper case letter, Latin, Other, U+00F8
|
||||
U+00D9 Letter: Upper case letter, Latin, Other, U+00F9
|
||||
U+00DA Letter: Upper case letter, Latin, Other, U+00FA
|
||||
U+00DB Letter: Upper case letter, Latin, Other, U+00FB
|
||||
U+00DC Letter: Upper case letter, Latin, Other, U+00FC
|
||||
U+00DD Letter: Upper case letter, Latin, Other, U+00FD
|
||||
U+00DE Letter: Upper case letter, Latin, Other, U+00FE
|
||||
U+00DF Letter: Lower case letter, Latin, Other, U+1E9E
|
||||
U+00D0 L Letter: Upper case letter, Latin, Other, U+00F0
|
||||
U+00D1 L Letter: Upper case letter, Latin, Other, U+00F1
|
||||
U+00D2 L Letter: Upper case letter, Latin, Other, U+00F2
|
||||
U+00D3 L Letter: Upper case letter, Latin, Other, U+00F3
|
||||
U+00D4 L Letter: Upper case letter, Latin, Other, U+00F4
|
||||
U+00D5 L Letter: Upper case letter, Latin, Other, U+00F5
|
||||
U+00D6 L Letter: Upper case letter, Latin, Other, U+00F6
|
||||
U+00D7 ON Symbol: Mathematical symbol, Common, Other
|
||||
U+00D8 L Letter: Upper case letter, Latin, Other, U+00F8
|
||||
U+00D9 L Letter: Upper case letter, Latin, Other, U+00F9
|
||||
U+00DA L Letter: Upper case letter, Latin, Other, U+00FA
|
||||
U+00DB L Letter: Upper case letter, Latin, Other, U+00FB
|
||||
U+00DC L Letter: Upper case letter, Latin, Other, U+00FC
|
||||
U+00DD L Letter: Upper case letter, Latin, Other, U+00FD
|
||||
U+00DE L Letter: Upper case letter, Latin, Other, U+00FE
|
||||
U+00DF L Letter: Lower case letter, Latin, Other, U+1E9E
|
||||
findprop e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ea eb ec ed ee ef
|
||||
U+00E0 Letter: Lower case letter, Latin, Other, U+00C0
|
||||
U+00E1 Letter: Lower case letter, Latin, Other, U+00C1
|
||||
U+00E2 Letter: Lower case letter, Latin, Other, U+00C2
|
||||
U+00E3 Letter: Lower case letter, Latin, Other, U+00C3
|
||||
U+00E4 Letter: Lower case letter, Latin, Other, U+00C4
|
||||
U+00E5 Letter: Lower case letter, Latin, Other, U+00C5, U+212B
|
||||
U+00E6 Letter: Lower case letter, Latin, Other, U+00C6
|
||||
U+00E7 Letter: Lower case letter, Latin, Other, U+00C7
|
||||
U+00E8 Letter: Lower case letter, Latin, Other, U+00C8
|
||||
U+00E9 Letter: Lower case letter, Latin, Other, U+00C9
|
||||
U+00EA Letter: Lower case letter, Latin, Other, U+00CA
|
||||
U+00EB Letter: Lower case letter, Latin, Other, U+00CB
|
||||
U+00EC Letter: Lower case letter, Latin, Other, U+00CC
|
||||
U+00ED Letter: Lower case letter, Latin, Other, U+00CD
|
||||
U+00EE Letter: Lower case letter, Latin, Other, U+00CE
|
||||
U+00EF Letter: Lower case letter, Latin, Other, U+00CF
|
||||
U+00E0 L Letter: Lower case letter, Latin, Other, U+00C0
|
||||
U+00E1 L Letter: Lower case letter, Latin, Other, U+00C1
|
||||
U+00E2 L Letter: Lower case letter, Latin, Other, U+00C2
|
||||
U+00E3 L Letter: Lower case letter, Latin, Other, U+00C3
|
||||
U+00E4 L Letter: Lower case letter, Latin, Other, U+00C4
|
||||
U+00E5 L Letter: Lower case letter, Latin, Other, U+00C5, U+212B
|
||||
U+00E6 L Letter: Lower case letter, Latin, Other, U+00C6
|
||||
U+00E7 L Letter: Lower case letter, Latin, Other, U+00C7
|
||||
U+00E8 L Letter: Lower case letter, Latin, Other, U+00C8
|
||||
U+00E9 L Letter: Lower case letter, Latin, Other, U+00C9
|
||||
U+00EA L Letter: Lower case letter, Latin, Other, U+00CA
|
||||
U+00EB L Letter: Lower case letter, Latin, Other, U+00CB
|
||||
U+00EC L Letter: Lower case letter, Latin, Other, U+00CC
|
||||
U+00ED L Letter: Lower case letter, Latin, Other, U+00CD
|
||||
U+00EE L Letter: Lower case letter, Latin, Other, U+00CE
|
||||
U+00EF L Letter: Lower case letter, Latin, Other, U+00CF
|
||||
findprop f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
|
||||
U+00F0 Letter: Lower case letter, Latin, Other, U+00D0
|
||||
U+00F1 Letter: Lower case letter, Latin, Other, U+00D1
|
||||
U+00F2 Letter: Lower case letter, Latin, Other, U+00D2
|
||||
U+00F3 Letter: Lower case letter, Latin, Other, U+00D3
|
||||
U+00F4 Letter: Lower case letter, Latin, Other, U+00D4
|
||||
U+00F5 Letter: Lower case letter, Latin, Other, U+00D5
|
||||
U+00F6 Letter: Lower case letter, Latin, Other, U+00D6
|
||||
U+00F7 Symbol: Mathematical symbol, Common, Other
|
||||
U+00F8 Letter: Lower case letter, Latin, Other, U+00D8
|
||||
U+00F9 Letter: Lower case letter, Latin, Other, U+00D9
|
||||
U+00FA Letter: Lower case letter, Latin, Other, U+00DA
|
||||
U+00FB Letter: Lower case letter, Latin, Other, U+00DB
|
||||
U+00FC Letter: Lower case letter, Latin, Other, U+00DC
|
||||
U+00FD Letter: Lower case letter, Latin, Other, U+00DD
|
||||
U+00FE Letter: Lower case letter, Latin, Other, U+00DE
|
||||
U+00FF Letter: Lower case letter, Latin, Other, U+0178
|
||||
U+00F0 L Letter: Lower case letter, Latin, Other, U+00D0
|
||||
U+00F1 L Letter: Lower case letter, Latin, Other, U+00D1
|
||||
U+00F2 L Letter: Lower case letter, Latin, Other, U+00D2
|
||||
U+00F3 L Letter: Lower case letter, Latin, Other, U+00D3
|
||||
U+00F4 L Letter: Lower case letter, Latin, Other, U+00D4
|
||||
U+00F5 L Letter: Lower case letter, Latin, Other, U+00D5
|
||||
U+00F6 L Letter: Lower case letter, Latin, Other, U+00D6
|
||||
U+00F7 ON Symbol: Mathematical symbol, Common, Other
|
||||
U+00F8 L Letter: Lower case letter, Latin, Other, U+00D8
|
||||
U+00F9 L Letter: Lower case letter, Latin, Other, U+00D9
|
||||
U+00FA L Letter: Lower case letter, Latin, Other, U+00DA
|
||||
U+00FB L Letter: Lower case letter, Latin, Other, U+00DB
|
||||
U+00FC L Letter: Lower case letter, Latin, Other, U+00DC
|
||||
U+00FD L Letter: Lower case letter, Latin, Other, U+00DD
|
||||
U+00FE L Letter: Lower case letter, Latin, Other, U+00DE
|
||||
U+00FF L Letter: Lower case letter, Latin, Other, U+0178
|
||||
|
||||
findprop 0100 0101 0102 0103 0104 0105 0106
|
||||
U+0100 Letter: Upper case letter, Latin, Other, U+0101
|
||||
U+0101 Letter: Lower case letter, Latin, Other, U+0100
|
||||
U+0102 Letter: Upper case letter, Latin, Other, U+0103
|
||||
U+0103 Letter: Lower case letter, Latin, Other, U+0102
|
||||
U+0104 Letter: Upper case letter, Latin, Other, U+0105
|
||||
U+0105 Letter: Lower case letter, Latin, Other, U+0104
|
||||
U+0106 Letter: Upper case letter, Latin, Other, U+0107
|
||||
U+0100 L Letter: Upper case letter, Latin, Other, U+0101
|
||||
U+0101 L Letter: Lower case letter, Latin, Other, U+0100
|
||||
U+0102 L Letter: Upper case letter, Latin, Other, U+0103
|
||||
U+0103 L Letter: Lower case letter, Latin, Other, U+0102
|
||||
U+0104 L Letter: Upper case letter, Latin, Other, U+0105
|
||||
U+0105 L Letter: Lower case letter, Latin, Other, U+0104
|
||||
U+0106 L Letter: Upper case letter, Latin, Other, U+0107
|
||||
|
||||
findprop ffe0 ffe1 ffe2 ffe3 ffe4 ffe5 ffe6 ffe7
|
||||
U+FFE0 Symbol: Currency symbol, Common, Other
|
||||
U+FFE1 Symbol: Currency symbol, Common, Other
|
||||
U+FFE2 Symbol: Mathematical symbol, Common, Other
|
||||
U+FFE3 Symbol: Modifier symbol, Common, Other
|
||||
U+FFE4 Symbol: Other symbol, Common, Other
|
||||
U+FFE5 Symbol: Currency symbol, Common, Other
|
||||
U+FFE6 Symbol: Currency symbol, Common, Other
|
||||
U+FFE7 Control: Unassigned, Unknown, Other
|
||||
U+FFE0 ET Symbol: Currency symbol, Common, Other
|
||||
U+FFE1 ET Symbol: Currency symbol, Common, Other
|
||||
U+FFE2 ON Symbol: Mathematical symbol, Common, Other
|
||||
U+FFE3 ON Symbol: Modifier symbol, Common, Other
|
||||
U+FFE4 ON Symbol: Other symbol, Common, Other
|
||||
U+FFE5 ET Symbol: Currency symbol, Common, Other
|
||||
U+FFE6 ET Symbol: Currency symbol, Common, Other
|
||||
U+FFE7 L Control: Unassigned, Unknown, Other
|
||||
findprop ffe8 ffe9 ffea ffeb ffec ffed ffee ffef
|
||||
U+FFE8 Symbol: Other symbol, Common, Other
|
||||
U+FFE9 Symbol: Mathematical symbol, Common, Other
|
||||
U+FFEA Symbol: Mathematical symbol, Common, Other
|
||||
U+FFEB Symbol: Mathematical symbol, Common, Other
|
||||
U+FFEC Symbol: Mathematical symbol, Common, Other
|
||||
U+FFED Symbol: Other symbol, Common, Other
|
||||
U+FFEE Symbol: Other symbol, Common, Other
|
||||
U+FFEF Control: Unassigned, Unknown, Other
|
||||
U+FFE8 ON Symbol: Other symbol, Common, Other
|
||||
U+FFE9 ON Symbol: Mathematical symbol, Common, Other
|
||||
U+FFEA ON Symbol: Mathematical symbol, Common, Other
|
||||
U+FFEB ON Symbol: Mathematical symbol, Common, Other
|
||||
U+FFEC ON Symbol: Mathematical symbol, Common, Other
|
||||
U+FFED ON Symbol: Other symbol, Common, Other
|
||||
U+FFEE ON Symbol: Other symbol, Common, Other
|
||||
U+FFEF L Control: Unassigned, Unknown, Other
|
||||
findprop fff8 fff9 fffa fffb fffc fffd fffe ffff
|
||||
U+FFF8 Control: Unassigned, Unknown, Control
|
||||
U+FFF9 Control: Format, Common, Control
|
||||
U+FFFA Control: Format, Common, Control
|
||||
U+FFFB Control: Format, Common, Control
|
||||
U+FFFC Symbol: Other symbol, Common, Other
|
||||
U+FFFD Symbol: Other symbol, Common, Other
|
||||
U+FFFE Control: Unassigned, Unknown, Other
|
||||
U+FFFF Control: Unassigned, Unknown, Other
|
||||
U+FFF8 BN Control: Unassigned, Unknown, Control
|
||||
U+FFF9 ON Control: Format, Common, Control
|
||||
U+FFFA ON Control: Format, Common, Control
|
||||
U+FFFB ON Control: Format, Common, Control
|
||||
U+FFFC ON Symbol: Other symbol, Common, Other
|
||||
U+FFFD ON Symbol: Other symbol, Common, Other
|
||||
U+FFFE BN Control: Unassigned, Unknown, Other
|
||||
U+FFFF BN Control: Unassigned, Unknown, Other
|
||||
findprop 10000 10001 e01ef f0000 100000
|
||||
U+10000 Letter: Other letter, Linear_B, Other
|
||||
U+10001 Letter: Other letter, Linear_B, Other
|
||||
U+E01EF Mark: Non-spacing mark, Inherited, Extend
|
||||
U+F0000 Control: Private use, Unknown, Other
|
||||
U+100000 Control: Private use, Unknown, Other
|
||||
U+10000 L Letter: Other letter, Linear_B, Other
|
||||
U+10001 L Letter: Other letter, Linear_B, Other
|
||||
U+E01EF NSM Mark: Non-spacing mark, Inherited, Extend
|
||||
U+F0000 L Control: Private use, Unknown, Other
|
||||
U+100000 L Control: Private use, Unknown, Other
|
||||
|
||||
findprop 1b00 12000 7c0 a840 10900
|
||||
U+1B00 Mark: Non-spacing mark, Balinese, Extend
|
||||
U+12000 Letter: Other letter, Cuneiform, Other
|
||||
U+07C0 Number: Decimal number, Nko, Other
|
||||
U+A840 Letter: Other letter, Phags_Pa, Other
|
||||
U+10900 Letter: Other letter, Phoenician, Other
|
||||
U+1B00 NSM Mark: Non-spacing mark, Balinese, Extend
|
||||
U+12000 L Letter: Other letter, Cuneiform, Other
|
||||
U+07C0 R Number: Decimal number, Nko, Other
|
||||
U+A840 L Letter: Other letter, Phags_Pa, Other
|
||||
U+10900 R Letter: Other letter, Phoenician, Other
|
||||
findprop 1d79 a77d
|
||||
U+1D79 Letter: Lower case letter, Latin, Other, U+A77D
|
||||
U+A77D Letter: Upper case letter, Latin, Other, U+1D79
|
||||
U+1D79 L Letter: Lower case letter, Latin, Other, U+A77D
|
||||
U+A77D L Letter: Upper case letter, Latin, Other, U+1D79
|
||||
|
||||
findprop 0800 083e a4d0 a4f7 aa80 aadf
|
||||
U+0800 Letter: Other letter, Samaritan, Other
|
||||
U+083E Punctuation: Other punctuation, Samaritan, Other
|
||||
U+A4D0 Letter: Other letter, Lisu, Other
|
||||
U+A4F7 Letter: Other letter, Lisu, Other
|
||||
U+AA80 Letter: Other letter, Tai_Viet, Other
|
||||
U+AADF Punctuation: Other punctuation, Tai_Viet, Other
|
||||
U+0800 R Letter: Other letter, Samaritan, Other
|
||||
U+083E R Punctuation: Other punctuation, Samaritan, Other
|
||||
U+A4D0 L Letter: Other letter, Lisu, Other
|
||||
U+A4F7 L Letter: Other letter, Lisu, Other
|
||||
U+AA80 L Letter: Other letter, Tai_Viet, Other
|
||||
U+AADF L Punctuation: Other punctuation, Tai_Viet, Other
|
||||
findprop 10b00 10b35 13000 1342e 10840 10855
|
||||
U+10B00 Letter: Other letter, Avestan, Other
|
||||
U+10B35 Letter: Other letter, Avestan, Other
|
||||
U+13000 Letter: Other letter, Egyptian_Hieroglyphs, Other
|
||||
U+1342E Letter: Other letter, Egyptian_Hieroglyphs, Other
|
||||
U+10840 Letter: Other letter, Imperial_Aramaic, Other
|
||||
U+10855 Letter: Other letter, Imperial_Aramaic, Other
|
||||
U+10B00 R Letter: Other letter, Avestan, Other
|
||||
U+10B35 R Letter: Other letter, Avestan, Other
|
||||
U+13000 L Letter: Other letter, Egyptian_Hieroglyphs, Other
|
||||
U+1342E L Letter: Other letter, Egyptian_Hieroglyphs, Other
|
||||
U+10840 R Letter: Other letter, Imperial_Aramaic, Other
|
||||
U+10855 R Letter: Other letter, Imperial_Aramaic, Other
|
||||
|
||||
findprop 11100 1113c 11680 116c0
|
||||
U+11100 Mark: Non-spacing mark, Chakma, Extend
|
||||
U+1113C Number: Decimal number, Chakma, Other
|
||||
U+11680 Letter: Other letter, Takri, Other
|
||||
U+116C0 Number: Decimal number, Takri, Other
|
||||
U+11100 NSM Mark: Non-spacing mark, Chakma, Extend
|
||||
U+1113C L Number: Decimal number, Chakma, Other
|
||||
U+11680 L Letter: Other letter, Takri, Other
|
||||
U+116C0 L Number: Decimal number, Takri, Other
|
||||
|
||||
findprop 0d 0a 0e 0711 1b04 1111 1169 11fe ae4c ad89
|
||||
U+000D Control: Control, Common, CR
|
||||
U+000A Control: Control, Common, LF
|
||||
U+000E Control: Control, Common, Control
|
||||
U+0711 Mark: Non-spacing mark, Syriac, Extend
|
||||
U+1B04 Mark: Spacing mark, Balinese, SpacingMark
|
||||
U+1111 Letter: Other letter, Hangul, Hangul syllable type L
|
||||
U+1169 Letter: Other letter, Hangul, Hangul syllable type V
|
||||
U+11FE Letter: Other letter, Hangul, Hangul syllable type T
|
||||
U+AE4C Letter: Other letter, Hangul, Hangul syllable type LV
|
||||
U+AD89 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+000D B Control: Control, Common, CR
|
||||
U+000A B Control: Control, Common, LF
|
||||
U+000E BN Control: Control, Common, Control
|
||||
U+0711 NSM Mark: Non-spacing mark, Syriac, Extend
|
||||
U+1B04 L Mark: Spacing mark, Balinese, SpacingMark
|
||||
U+1111 L Letter: Other letter, Hangul, Hangul syllable type L
|
||||
U+1169 L Letter: Other letter, Hangul, Hangul syllable type V
|
||||
U+11FE L Letter: Other letter, Hangul, Hangul syllable type T
|
||||
U+AE4C L Letter: Other letter, Hangul, Hangul syllable type LV
|
||||
U+AD89 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
|
||||
findprop 118a0 11ac7 16ad0
|
||||
U+118A0 Letter: Upper case letter, Warang_Citi, Other, U+118C0
|
||||
U+11AC7 Letter: Other letter, Pau_Cin_Hau, Other
|
||||
U+16AD0 Letter: Other letter, Bassa_Vah, Other
|
||||
U+118A0 L Letter: Upper case letter, Warang_Citi, Other, U+118C0
|
||||
U+11AC7 L Letter: Other letter, Pau_Cin_Hau, Other
|
||||
U+16AD0 L Letter: Other letter, Bassa_Vah, Other
|
||||
|
||||
findprop 11700 14400 108e0 11280 1d800
|
||||
U+11700 Letter: Other letter, Ahom, Other
|
||||
U+14400 Letter: Other letter, Anatolian_Hieroglyphs, Other
|
||||
U+108E0 Letter: Other letter, Hatran, Other
|
||||
U+11280 Letter: Other letter, Multani, Other
|
||||
U+1D800 Symbol: Other symbol, SignWriting, Other
|
||||
U+11700 L Letter: Other letter, Ahom, Other
|
||||
U+14400 L Letter: Other letter, Anatolian_Hieroglyphs, Other
|
||||
U+108E0 R Letter: Other letter, Hatran, Other
|
||||
U+11280 L Letter: Other letter, Multani, Other
|
||||
U+1D800 L Symbol: Other symbol, SignWriting, Other
|
||||
|
||||
findprop 11800 1e903 11da9 10d27 11ee0 16e48 10f27 10f30
|
||||
U+11800 Letter: Other letter, Dogra, Other
|
||||
U+1E903 Letter: Upper case letter, Adlam, Other, U+1E925
|
||||
U+11DA9 Number: Decimal number, Gunjala_Gondi, Other
|
||||
U+10D27 Mark: Non-spacing mark, Hanifi_Rohingya, Extend
|
||||
U+11EE0 Letter: Other letter, Makasar, Other
|
||||
U+16E48 Letter: Upper case letter, Medefaidrin, Other, U+16E68
|
||||
U+10F27 Letter: Other letter, Old_Sogdian, Other
|
||||
U+10F30 Letter: Other letter, Sogdian, Other
|
||||
U+11800 L Letter: Other letter, Dogra, Other
|
||||
U+1E903 R Letter: Upper case letter, Adlam, Other, U+1E925
|
||||
U+11DA9 L Number: Decimal number, Gunjala_Gondi, Other
|
||||
U+10D27 NSM Mark: Non-spacing mark, Hanifi_Rohingya, Extend
|
||||
U+11EE0 L Letter: Other letter, Makasar, Other
|
||||
U+16E48 L Letter: Upper case letter, Medefaidrin, Other, U+16E68
|
||||
U+10F27 R Letter: Other letter, Old_Sogdian, Other
|
||||
U+10F30 AL Letter: Other letter, Sogdian, Other
|
||||
|
||||
findprop a836 a833 1cf4 20f0 1cd0
|
||||
U+A836 Symbol: Other symbol, Common, Other, [Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kaithi, Mahajani, Modi, Khudawadi, Takri, Tirhuta]
|
||||
U+A833 Number: Other number, Common, Other, [Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kannada, Kaithi, Mahajani, Modi, Nandinagari, Khudawadi, Takri, Tirhuta]
|
||||
U+1CF4 Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Kannada]
|
||||
U+20F0 Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Latin]
|
||||
U+1CD0 Mark: Non-spacing mark, Inherited, Extend, [Bengali, Devanagari, Grantha, Kannada]
|
||||
U+A836 L Symbol: Other symbol, Common, Other, [Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kaithi, Mahajani, Modi, Khudawadi, Takri, Tirhuta]
|
||||
U+A833 L Number: Other number, Common, Other, [Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kannada, Kaithi, Mahajani, Modi, Nandinagari, Khudawadi, Takri, Tirhuta]
|
||||
U+1CF4 NSM Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Kannada]
|
||||
U+20F0 NSM Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Latin]
|
||||
U+1CD0 NSM Mark: Non-spacing mark, Inherited, Extend, [Bengali, Devanagari, Grantha, Kannada]
|
||||
|
||||
findprop 32ff
|
||||
U+32FF Symbol: Other symbol, Common, Other, [Han]
|
||||
U+32FF L Symbol: Other symbol, Common, Other, [Han]
|
||||
|
||||
findprop 1f16d
|
||||
U+1F16D Symbol: Other symbol, Common, Extended Pictographic
|
||||
U+1F16D ON Symbol: Other symbol, Common, Extended Pictographic
|
||||
|
||||
findprop U+10e93 U+10eaa
|
||||
U+10E93 Letter: Other letter, Yezidi, Other
|
||||
U+10EAA Control: Unassigned, Unknown, Other
|
||||
U+10E93 R Letter: Other letter, Yezidi, Other
|
||||
U+10EAA R Control: Unassigned, Unknown, Other
|
||||
|
||||
findprop 0602 202a 202b 202c 2068 2069 202d 202e 2067
|
||||
U+0602 AN Control: Format, Arabic, Prepend
|
||||
U+202A *LRE Control: Format, Common, Control
|
||||
U+202B *RLE Control: Format, Common, Control
|
||||
U+202C *PDF Control: Format, Common, Control
|
||||
U+2068 *FSI Control: Format, Common, Control
|
||||
U+2069 *PDI Control: Format, Common, Control
|
||||
U+202D *LRO Control: Format, Common, Control
|
||||
U+202E *RLO Control: Format, Common, Control
|
||||
U+2067 *RLI Control: Format, Common, Control
|
||||
|
|
|
@ -1,196 +1,253 @@
|
|||
find script Han
|
||||
U+2E80..U+2E99 Symbol: Other symbol, Han, Other
|
||||
U+2E9B..U+2EF3 Symbol: Other symbol, Han, Other
|
||||
U+2F00..U+2FD5 Symbol: Other symbol, Han, Other
|
||||
U+3005 Letter: Modifier letter, Han, Other
|
||||
U+3007 Number: Letter number, Han, Other
|
||||
U+3021..U+3029 Number: Letter number, Han, Other
|
||||
U+3038..U+303A Number: Letter number, Han, Other
|
||||
U+303B Letter: Modifier letter, Han, Other
|
||||
U+3400..U+4DBF Letter: Other letter, Han, Other
|
||||
U+4E00..U+9FFF Letter: Other letter, Han, Other
|
||||
U+F900..U+FA6D Letter: Other letter, Han, Other
|
||||
U+FA70..U+FAD9 Letter: Other letter, Han, Other
|
||||
U+16FE2 Punctuation: Other punctuation, Han, Other
|
||||
U+16FE3 Letter: Modifier letter, Han, Other
|
||||
U+16FF0..U+16FF1 Mark: Spacing mark, Han, SpacingMark
|
||||
U+20000..U+2A6DF Letter: Other letter, Han, Other
|
||||
U+2A700..U+2B738 Letter: Other letter, Han, Other
|
||||
U+2B740..U+2B81D Letter: Other letter, Han, Other
|
||||
U+2B820..U+2CEA1 Letter: Other letter, Han, Other
|
||||
U+2CEB0..U+2EBE0 Letter: Other letter, Han, Other
|
||||
U+2F800..U+2FA1D Letter: Other letter, Han, Other
|
||||
U+30000..U+3134A Letter: Other letter, Han, Other
|
||||
U+2E80..U+2E99 ON Symbol: Other symbol, Han, Other
|
||||
U+2E9B..U+2EF3 ON Symbol: Other symbol, Han, Other
|
||||
U+2F00..U+2FD5 ON Symbol: Other symbol, Han, Other
|
||||
U+3005 L Letter: Modifier letter, Han, Other
|
||||
U+3007 L Number: Letter number, Han, Other
|
||||
U+3021..U+3029 L Number: Letter number, Han, Other
|
||||
U+3038..U+303A L Number: Letter number, Han, Other
|
||||
U+303B L Letter: Modifier letter, Han, Other
|
||||
U+3400..U+4DBF L Letter: Other letter, Han, Other
|
||||
U+4E00..U+9FFF L Letter: Other letter, Han, Other
|
||||
U+F900..U+FA6D L Letter: Other letter, Han, Other
|
||||
U+FA70..U+FAD9 L Letter: Other letter, Han, Other
|
||||
U+16FE2 ON Punctuation: Other punctuation, Han, Other
|
||||
U+16FE3 L Letter: Modifier letter, Han, Other
|
||||
U+16FF0..U+16FF1 L Mark: Spacing mark, Han, SpacingMark
|
||||
U+20000..U+2A6DF L Letter: Other letter, Han, Other
|
||||
U+2A700..U+2B738 L Letter: Other letter, Han, Other
|
||||
U+2B740..U+2B81D L Letter: Other letter, Han, Other
|
||||
U+2B820..U+2CEA1 L Letter: Other letter, Han, Other
|
||||
U+2CEB0..U+2EBE0 L Letter: Other letter, Han, Other
|
||||
U+2F800..U+2FA1D L Letter: Other letter, Han, Other
|
||||
U+30000..U+3134A L Letter: Other letter, Han, Other
|
||||
find type Pe script Common scriptx Hangul
|
||||
U+3009 Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+300B Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+300D Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+300F Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+3011 Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+3015 Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+3017 Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+3019 Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+301B Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+301E..U+301F Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana]
|
||||
U+FF63 Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+3009 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+300B ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+300D ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+300F ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+3011 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+3015 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+3017 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+3019 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+301B ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+301E..U+301F ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana]
|
||||
U+FF63 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
find type Sk
|
||||
U+005E Symbol: Modifier symbol, Common, Other
|
||||
U+0060 Symbol: Modifier symbol, Common, Other
|
||||
U+00A8 Symbol: Modifier symbol, Common, Other
|
||||
U+00AF Symbol: Modifier symbol, Common, Other
|
||||
U+00B4 Symbol: Modifier symbol, Common, Other
|
||||
U+00B8 Symbol: Modifier symbol, Common, Other
|
||||
U+02C2..U+02C5 Symbol: Modifier symbol, Common, Other
|
||||
U+02D2..U+02DF Symbol: Modifier symbol, Common, Other
|
||||
U+02E5..U+02E9 Symbol: Modifier symbol, Common, Other
|
||||
U+02EA..U+02EB Symbol: Modifier symbol, Bopomofo, Other
|
||||
U+02ED Symbol: Modifier symbol, Common, Other
|
||||
U+02EF..U+02FF Symbol: Modifier symbol, Common, Other
|
||||
U+0375 Symbol: Modifier symbol, Greek, Other
|
||||
U+0384 Symbol: Modifier symbol, Greek, Other
|
||||
U+0385 Symbol: Modifier symbol, Common, Other
|
||||
U+0888 Symbol: Modifier symbol, Arabic, Other
|
||||
U+1FBD Symbol: Modifier symbol, Greek, Other
|
||||
U+1FBF..U+1FC1 Symbol: Modifier symbol, Greek, Other
|
||||
U+1FCD..U+1FCF Symbol: Modifier symbol, Greek, Other
|
||||
U+1FDD..U+1FDF Symbol: Modifier symbol, Greek, Other
|
||||
U+1FED..U+1FEF Symbol: Modifier symbol, Greek, Other
|
||||
U+1FFD..U+1FFE Symbol: Modifier symbol, Greek, Other
|
||||
U+309B..U+309C Symbol: Modifier symbol, Common, Other, [Hiragana, Katakana]
|
||||
U+A700..U+A707 Symbol: Modifier symbol, Common, Other, [Han, Latin]
|
||||
U+A708..U+A716 Symbol: Modifier symbol, Common, Other
|
||||
U+A720..U+A721 Symbol: Modifier symbol, Common, Other
|
||||
U+A789..U+A78A Symbol: Modifier symbol, Common, Other
|
||||
U+AB5B Symbol: Modifier symbol, Common, Other
|
||||
U+AB6A..U+AB6B Symbol: Modifier symbol, Common, Other
|
||||
U+FBB2..U+FBC2 Symbol: Modifier symbol, Arabic, Other
|
||||
U+FF3E Symbol: Modifier symbol, Common, Other
|
||||
U+FF40 Symbol: Modifier symbol, Common, Other
|
||||
U+FFE3 Symbol: Modifier symbol, Common, Other
|
||||
U+1F3FB..U+1F3FF Symbol: Modifier symbol, Common, Extend
|
||||
U+005E ON Symbol: Modifier symbol, Common, Other
|
||||
U+0060 ON Symbol: Modifier symbol, Common, Other
|
||||
U+00A8 ON Symbol: Modifier symbol, Common, Other
|
||||
U+00AF ON Symbol: Modifier symbol, Common, Other
|
||||
U+00B4 ON Symbol: Modifier symbol, Common, Other
|
||||
U+00B8 ON Symbol: Modifier symbol, Common, Other
|
||||
U+02C2..U+02C5 ON Symbol: Modifier symbol, Common, Other
|
||||
U+02D2..U+02DF ON Symbol: Modifier symbol, Common, Other
|
||||
U+02E5..U+02E9 ON Symbol: Modifier symbol, Common, Other
|
||||
U+02EA..U+02EB ON Symbol: Modifier symbol, Bopomofo, Other
|
||||
U+02ED ON Symbol: Modifier symbol, Common, Other
|
||||
U+02EF..U+02FF ON Symbol: Modifier symbol, Common, Other
|
||||
U+0375 ON Symbol: Modifier symbol, Greek, Other
|
||||
U+0384 ON Symbol: Modifier symbol, Greek, Other
|
||||
U+0385 ON Symbol: Modifier symbol, Common, Other
|
||||
U+0888 AL Symbol: Modifier symbol, Arabic, Other
|
||||
U+1FBD ON Symbol: Modifier symbol, Greek, Other
|
||||
U+1FBF..U+1FC1 ON Symbol: Modifier symbol, Greek, Other
|
||||
U+1FCD..U+1FCF ON Symbol: Modifier symbol, Greek, Other
|
||||
U+1FDD..U+1FDF ON Symbol: Modifier symbol, Greek, Other
|
||||
U+1FED..U+1FEF ON Symbol: Modifier symbol, Greek, Other
|
||||
U+1FFD..U+1FFE ON Symbol: Modifier symbol, Greek, Other
|
||||
U+309B..U+309C ON Symbol: Modifier symbol, Common, Other, [Hiragana, Katakana]
|
||||
U+A700..U+A707 ON Symbol: Modifier symbol, Common, Other, [Han, Latin]
|
||||
U+A708..U+A716 ON Symbol: Modifier symbol, Common, Other
|
||||
U+A720..U+A721 ON Symbol: Modifier symbol, Common, Other
|
||||
U+A789..U+A78A L Symbol: Modifier symbol, Common, Other
|
||||
U+AB5B L Symbol: Modifier symbol, Common, Other
|
||||
U+AB6A..U+AB6B ON Symbol: Modifier symbol, Common, Other
|
||||
U+FBB2..U+FBC2 AL Symbol: Modifier symbol, Arabic, Other
|
||||
U+FF3E ON Symbol: Modifier symbol, Common, Other
|
||||
U+FF40 ON Symbol: Modifier symbol, Common, Other
|
||||
U+FFE3 ON Symbol: Modifier symbol, Common, Other
|
||||
U+1F3FB..U+1F3FF ON Symbol: Modifier symbol, Common, Extend
|
||||
find type Pd
|
||||
U+002D Punctuation: Dash punctuation, Common, Other
|
||||
U+058A Punctuation: Dash punctuation, Armenian, Other
|
||||
U+05BE Punctuation: Dash punctuation, Hebrew, Other
|
||||
U+1400 Punctuation: Dash punctuation, Canadian_Aboriginal, Other
|
||||
U+1806 Punctuation: Dash punctuation, Mongolian, Other
|
||||
U+2010..U+2015 Punctuation: Dash punctuation, Common, Other
|
||||
U+2E17 Punctuation: Dash punctuation, Common, Other
|
||||
U+2E1A Punctuation: Dash punctuation, Common, Other
|
||||
U+2E3A..U+2E3B Punctuation: Dash punctuation, Common, Other
|
||||
U+2E40 Punctuation: Dash punctuation, Common, Other
|
||||
U+2E5D Punctuation: Dash punctuation, Common, Other
|
||||
U+301C Punctuation: Dash punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana]
|
||||
U+3030 Punctuation: Dash punctuation, Common, Extended Pictographic, [Bopomofo, Hangul, Han, Hiragana, Katakana]
|
||||
U+30A0 Punctuation: Dash punctuation, Common, Other, [Hiragana, Katakana]
|
||||
U+FE31..U+FE32 Punctuation: Dash punctuation, Common, Other
|
||||
U+FE58 Punctuation: Dash punctuation, Common, Other
|
||||
U+FE63 Punctuation: Dash punctuation, Common, Other
|
||||
U+FF0D Punctuation: Dash punctuation, Common, Other
|
||||
U+10EAD Punctuation: Dash punctuation, Yezidi, Other
|
||||
U+002D ES Punctuation: Dash punctuation, Common, Other
|
||||
U+058A ON Punctuation: Dash punctuation, Armenian, Other
|
||||
U+05BE R Punctuation: Dash punctuation, Hebrew, Other
|
||||
U+1400 ON Punctuation: Dash punctuation, Canadian_Aboriginal, Other
|
||||
U+1806 ON Punctuation: Dash punctuation, Mongolian, Other
|
||||
U+2010..U+2015 ON Punctuation: Dash punctuation, Common, Other
|
||||
U+2E17 ON Punctuation: Dash punctuation, Common, Other
|
||||
U+2E1A ON Punctuation: Dash punctuation, Common, Other
|
||||
U+2E3A..U+2E3B ON Punctuation: Dash punctuation, Common, Other
|
||||
U+2E40 ON Punctuation: Dash punctuation, Common, Other
|
||||
U+2E5D ON Punctuation: Dash punctuation, Common, Other
|
||||
U+301C ON Punctuation: Dash punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana]
|
||||
U+3030 ON Punctuation: Dash punctuation, Common, Extended Pictographic, [Bopomofo, Hangul, Han, Hiragana, Katakana]
|
||||
U+30A0 ON Punctuation: Dash punctuation, Common, Other, [Hiragana, Katakana]
|
||||
U+FE31..U+FE32 ON Punctuation: Dash punctuation, Common, Other
|
||||
U+FE58 ON Punctuation: Dash punctuation, Common, Other
|
||||
U+FE63 ES Punctuation: Dash punctuation, Common, Other
|
||||
U+FF0D ES Punctuation: Dash punctuation, Common, Other
|
||||
U+10EAD R Punctuation: Dash punctuation, Yezidi, Other
|
||||
find gbreak LVT
|
||||
U+AC01..U+AC1B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC1D..U+AC37 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC39..U+AC53 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC55..U+AC6F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC71..U+AC8B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC8D..U+ACA7 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ACA9..U+ACC3 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ACC5..U+ACDF Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ACE1..U+ACFB Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ACFD..U+AD17 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AD19..U+AD33 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AD35..U+AD4F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AD51..U+AD6B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AD6D..U+AD87 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AD89..U+ADA3 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ADA5..U+ADBF Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ADC1..U+ADDB Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ADDD..U+ADF7 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ADF9..U+AE13 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AE15..U+AE2F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AE31..U+AE4B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AE4D..U+AE67 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AE69..U+AE83 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AE85..U+AE9F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AEA1..U+AEBB Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AEBD..U+AED7 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AED9..U+AEF3 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AEF5..U+AF0F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF11..U+AF2B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF2D..U+AF47 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF49..U+AF63 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF65..U+AF7F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF81..U+AF9B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF9D..U+AFB7 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AFB9..U+AFD3 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AFD5..U+AFEF Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AFF1..U+B00B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B00D..U+B027 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B029..U+B043 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B045..U+B05F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B061..U+B07B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B07D..U+B097 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B099..U+B0B3 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B0B5..U+B0CF Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B0D1..U+B0EB Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B0ED..U+B107 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B109..U+B123 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B125..U+B13F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B141..U+B15B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B15D..U+B177 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B179..U+B193 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B195..U+B1AF Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B1B1..U+B1CB Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B1CD..U+B1E7 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B1E9..U+B203 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B205..U+B21F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B221..U+B23B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B23D..U+B257 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B259..U+B273 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B275..U+B28F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B291..U+B2AB Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B2AD..U+B2C7 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B2C9..U+B2E3 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B2E5..U+B2FF Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B301..U+B31B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B31D..U+B337 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B339..U+B353 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B355..U+B36F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B371..U+B38B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B38D..U+B3A7 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B3A9..U+B3C3 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B3C5..U+B3DF Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B3E1..U+B3FB Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B3FD..U+B417 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B419..U+B433 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B435..U+B44F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B451..U+B46B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B46D..U+B487 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B489..U+B4A3 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B4A5..U+B4BF Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B4C1..U+B4DB Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B4DD..U+B4F7 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B4F9..U+B513 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B515..U+B52F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B531..U+B54B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B54D..U+B567 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B569..U+B583 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B585..U+B59F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B5A1..U+B5BB Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B5BD..U+B5D7 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B5D9..U+B5F3 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B5F5..U+B60F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B611..U+B62B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B62D..U+B647 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B649..U+B663 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B665..U+B67F Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B681..U+B69B Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B69D..U+B6B7 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B6B9..U+B6D3 Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B6D5..U+B6EF Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC01..U+AC1B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC1D..U+AC37 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC39..U+AC53 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC55..U+AC6F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC71..U+AC8B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC8D..U+ACA7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ACA9..U+ACC3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ACC5..U+ACDF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ACE1..U+ACFB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ACFD..U+AD17 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AD19..U+AD33 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AD35..U+AD4F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AD51..U+AD6B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AD6D..U+AD87 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AD89..U+ADA3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ADA5..U+ADBF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ADC1..U+ADDB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ADDD..U+ADF7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ADF9..U+AE13 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AE15..U+AE2F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AE31..U+AE4B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AE4D..U+AE67 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AE69..U+AE83 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AE85..U+AE9F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AEA1..U+AEBB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AEBD..U+AED7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AED9..U+AEF3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AEF5..U+AF0F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF11..U+AF2B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF2D..U+AF47 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF49..U+AF63 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF65..U+AF7F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF81..U+AF9B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF9D..U+AFB7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AFB9..U+AFD3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AFD5..U+AFEF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AFF1..U+B00B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B00D..U+B027 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B029..U+B043 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B045..U+B05F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B061..U+B07B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B07D..U+B097 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B099..U+B0B3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B0B5..U+B0CF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B0D1..U+B0EB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B0ED..U+B107 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B109..U+B123 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B125..U+B13F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B141..U+B15B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B15D..U+B177 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B179..U+B193 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B195..U+B1AF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B1B1..U+B1CB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B1CD..U+B1E7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B1E9..U+B203 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B205..U+B21F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B221..U+B23B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B23D..U+B257 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B259..U+B273 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B275..U+B28F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B291..U+B2AB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B2AD..U+B2C7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B2C9..U+B2E3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B2E5..U+B2FF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B301..U+B31B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B31D..U+B337 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B339..U+B353 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B355..U+B36F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B371..U+B38B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B38D..U+B3A7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B3A9..U+B3C3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B3C5..U+B3DF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B3E1..U+B3FB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B3FD..U+B417 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B419..U+B433 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B435..U+B44F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B451..U+B46B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B46D..U+B487 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B489..U+B4A3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B4A5..U+B4BF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B4C1..U+B4DB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B4DD..U+B4F7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B4F9..U+B513 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B515..U+B52F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B531..U+B54B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B54D..U+B567 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B569..U+B583 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B585..U+B59F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B5A1..U+B5BB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B5BD..U+B5D7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B5D9..U+B5F3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B5F5..U+B60F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B611..U+B62B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B62D..U+B647 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B649..U+B663 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B665..U+B67F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B681..U+B69B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B69D..U+B6B7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B6B9..U+B6D3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B6D5..U+B6EF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
...
|
||||
find script Old_Uyghur
|
||||
U+10F70..U+10F81 Letter: Other letter, Old_Uyghur, Other
|
||||
U+10F82..U+10F85 Mark: Non-spacing mark, Old_Uyghur, Extend
|
||||
U+10F86..U+10F89 Punctuation: Other punctuation, Old_Uyghur, Other
|
||||
U+10F70..U+10F81 R Letter: Other letter, Old_Uyghur, Other
|
||||
U+10F82..U+10F85 NSM Mark: Non-spacing mark, Old_Uyghur, Extend
|
||||
U+10F86..U+10F89 R Punctuation: Other punctuation, Old_Uyghur, Other
|
||||
find bidi PDF
|
||||
U+202C *PDF Control: Format, Common, Control
|
||||
find bidi CS
|
||||
U+002C CS Punctuation: Other punctuation, Common, Other
|
||||
U+002E..U+002F CS Punctuation: Other punctuation, Common, Other
|
||||
U+003A CS Punctuation: Other punctuation, Common, Other
|
||||
U+00A0 CS Separator: Space separator, Common, Other
|
||||
U+060C CS Punctuation: Other punctuation, Common, Other, [Arabic, Nko, Hanifi_Rohingya, Syriac, Thaana, Yezidi]
|
||||
U+202F CS Separator: Space separator, Common, Other, [Latin, Mongolian]
|
||||
U+2044 CS Symbol: Mathematical symbol, Common, Other
|
||||
U+FE50 CS Punctuation: Other punctuation, Common, Other
|
||||
U+FE52 CS Punctuation: Other punctuation, Common, Other
|
||||
U+FE55 CS Punctuation: Other punctuation, Common, Other
|
||||
U+FF0C CS Punctuation: Other punctuation, Common, Other
|
||||
U+FF0E..U+FF0F CS Punctuation: Other punctuation, Common, Other
|
||||
U+FF1A CS Punctuation: Other punctuation, Common, Other
|
||||
find bidi CS type Sm
|
||||
U+2044 CS Symbol: Mathematical symbol, Common, Other
|
||||
find bidi B
|
||||
U+000A B Control: Control, Common, LF
|
||||
U+000D B Control: Control, Common, CR
|
||||
U+001C..U+001E B Control: Control, Common, Control
|
||||
U+0085 B Control: Control, Common, Control
|
||||
U+2029 B Separator: Paragraph separator, Common, Control
|
||||
find bidi FSI
|
||||
U+2068 *FSI Control: Format, Common, Control
|
||||
find bidi PDI
|
||||
U+2069 *PDI Control: Format, Common, Control
|
||||
find bidi RLI
|
||||
U+2067 *RLI Control: Format, Common, Control
|
||||
find bidi RLO
|
||||
U+202E *RLO Control: Format, Common, Control
|
||||
find bidi S
|
||||
U+0009 S Control: Control, Common, Control
|
||||
U+000B S Control: Control, Common, Control
|
||||
U+001F S Control: Control, Common, Control
|
||||
find bidi WS
|
||||
U+000C WS Control: Control, Common, Control
|
||||
U+0020 WS Separator: Space separator, Common, Other
|
||||
U+1680 WS Separator: Space separator, Ogham, Other
|
||||
U+2000..U+200A WS Separator: Space separator, Common, Other
|
||||
U+2028 WS Separator: Line separator, Common, Control
|
||||
U+205F WS Separator: Space separator, Common, Other
|
||||
U+3000 WS Separator: Space separator, Common, Other
|
||||
find bidi_control
|
||||
U+061C *AL Control: Format, Arabic, Control, [Arabic, Syriac, Thaana]
|
||||
U+200E *L Control: Format, Common, Control
|
||||
U+200F *R Control: Format, Common, Control
|
||||
U+202A *LRE Control: Format, Common, Control
|
||||
U+202B *RLE Control: Format, Common, Control
|
||||
U+202C *PDF Control: Format, Common, Control
|
||||
U+202D *LRO Control: Format, Common, Control
|
||||
U+202E *RLO Control: Format, Common, Control
|
||||
U+2066 *LRT Control: Format, Common, Control
|
||||
U+2067 *RLI Control: Format, Common, Control
|
||||
U+2068 *FSI Control: Format, Common, Control
|
||||
U+2069 *PDI Control: Format, Common, Control
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
New API code Copyright (c) 2016-2021 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -1798,7 +1798,8 @@ typedef struct {
|
|||
uint8_t caseset; /* offset to multichar other cases or zero */
|
||||
int32_t other_case; /* offset to other case, or zero if none */
|
||||
int16_t scriptx; /* script extension value */
|
||||
int16_t dummy; /* spare - to round to multiple of 4 bytes */
|
||||
uint8_t bidi; /* bidi class and control flag */
|
||||
uint8_t dummy; /* spare - to round to multiple of 4 bytes */
|
||||
} ucd_record;
|
||||
|
||||
/* UCD access macros */
|
||||
|
@ -1823,6 +1824,13 @@ typedef struct {
|
|||
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
|
||||
#define UCD_SCRIPTX(ch) GET_UCD(ch)->scriptx
|
||||
|
||||
/* The "bidi" field has the 0x80 bit set if the character has the Bidi_Control
|
||||
property. The remaining bits hold the bidi class, but as there are only 23
|
||||
classes, we can mask off 5 bits - leaving two free for the future. */
|
||||
|
||||
#define UCD_BIDICLASS(ch) (GET_UCD(ch)->bidi & 0x1fu)
|
||||
#define UCD_BIDICONTROL(ch) (GET_UCD(ch)->bidi & 0x80u)
|
||||
|
||||
/* Header for serialized pcre2 codes. */
|
||||
|
||||
typedef struct pcre2_serialized_data {
|
||||
|
|
7774
src/pcre2_ucd.c
7774
src/pcre2_ucd.c
File diff suppressed because it is too large
Load Diff
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
New API code Copyright (c) 2016-2021 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -100,6 +100,34 @@ enum {
|
|||
ucp_Zs /* Space separator */
|
||||
};
|
||||
|
||||
/* These are the bidi class values. */
|
||||
|
||||
enum {
|
||||
ucp_bidiAL, /* Arabic letter */
|
||||
ucp_bidiAN, /* Arabic number */
|
||||
ucp_bidiB, /* Paragraph separator */
|
||||
ucp_bidiBN, /* Boundary neutral */
|
||||
ucp_bidiCS, /* Common separator */
|
||||
ucp_bidiEN, /* European number */
|
||||
ucp_bidiES, /* European separator */
|
||||
ucp_bidiET, /* European terminator */
|
||||
ucp_bidiFSI, /* First strong isolate */
|
||||
ucp_bidiL, /* Left to right */
|
||||
ucp_bidiLRE, /* Left to right embedding */
|
||||
ucp_bidiLRI, /* Left to right isolate */
|
||||
ucp_bidiLRO, /* Left to right override */
|
||||
ucp_bidiNSM, /* Non-spacing mark */
|
||||
ucp_bidiON, /* Other neutral */
|
||||
ucp_bidiPDF, /* Pop directional format */
|
||||
ucp_bidiPDI, /* Pop directional isolate */
|
||||
ucp_bidiR, /* Right to left */
|
||||
ucp_bidiRLE, /* Right to left embedding */
|
||||
ucp_bidiRLI, /* Right to left isolate */
|
||||
ucp_bidiRLO, /* Right to left override */
|
||||
ucp_bidiS, /* Segment separator */
|
||||
ucp_bidiWS /* White space */
|
||||
};
|
||||
|
||||
/* These are grapheme break properties. The Extended Pictographic property
|
||||
comes from the emoji-data.txt file. */
|
||||
|
||||
|
|
Loading…
Reference in New Issue