Initial code for Boolean property support

This commit is contained in:
Philip Hazel 2022-01-09 14:46:43 +00:00
parent 81d3729c66
commit 636569a957
22 changed files with 19715 additions and 5132 deletions

View File

@ -9,6 +9,9 @@
# December 2021.
import re
# ---------------------------------------------------------------------------
# DATA LISTS
# ---------------------------------------------------------------------------
@ -100,11 +103,58 @@ break_properties = [
'Extended_Pictographic', '14'
]
# List of files from which the names of Boolean properties are obtained, along
# with a list of regex patterns for properties to be ignored, and a list of
# extra pattern names to add.
bool_propsfiles = ['PropList.txt', 'DerivedCoreProperties.txt', 'emoji-data.txt']
bool_propsignore = [r'^Other_', r'^Hyphen$']
bool_propsextras = ['ASCII', 'Bidi_Mirrored']
# ---------------------------------------------------------------------------
# COLLECTING PROPERTY NAMES
# GET BOOLEAN PROPERTY NAMES
# ---------------------------------------------------------------------------
import re
# Get a list of Boolean property names from a number of files.
def getbpropslist():
bplist = []
bplast = ""
for filename in bool_propsfiles:
try:
file = open('Unicode.tables/' + filename, 'r')
except IOError:
print(f"** Couldn't open {'Unicode.tables/' + filename}\n")
sys.exit(1)
for line in file:
line = re.sub(r'#.*', '', line)
data = list(map(str.strip, line.split(';')))
if len(data) <= 1 or data[1] == bplast:
continue
bplast = data[1]
for pat in bool_propsignore:
if re.match(pat, bplast) != None:
break
else:
bplist.append(bplast)
file.close()
bplist.extend(bool_propsextras)
bplist.sort()
return bplist
bool_properties = getbpropslist()
bool_props_list_item_size = (len(bool_properties) + 31) // 32
# ---------------------------------------------------------------------------
# COLLECTING PROPERTY NAMES AND ALIASES
# ---------------------------------------------------------------------------
script_names = ['Unknown']
abbreviations = {}
@ -145,8 +195,25 @@ def collect_property_names():
else:
abbreviations[match_obj.group(3)] = (match_obj.group(2), match_obj.group(4))
# We can also collect Boolean property abbreviations into the same dictionary
bin_alias_re = re.compile(r' *([A-Za-z_]+) *; *([A-Za-z_]+)(?: *; *([A-Za-z_]+))?')
with open("Unicode.tables/PropertyAliases.txt") as f:
for line in f:
match_obj = bin_alias_re.match(line)
if match_obj == None:
continue
if match_obj.group(2) in bool_properties:
if match_obj.group(3) == None:
abbreviations[match_obj.group(2)] = (match_obj.group(1),)
else:
abbreviations[match_obj.group(2)] = (match_obj.group(1), match_obj.group(3))
collect_property_names()
# ---------------------------------------------------------------------------
# REORDERING SCRIPT NAMES
# ---------------------------------------------------------------------------
@ -192,6 +259,8 @@ def reorder_scripts():
script_abbrevs = new_script_abbrevs
reorder_scripts()
script_list_item_size = (script_names.index('Unknown') + 31) // 32
# ---------------------------------------------------------------------------
# DERIVED LISTS
@ -243,7 +312,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2021 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
""")

View File

@ -224,10 +224,14 @@ import sys
from GenerateCommon import \
bidi_classes, \
bool_properties, \
bool_propsfiles, \
bool_props_list_item_size, \
break_properties, \
category_names, \
general_category_names, \
script_abbrevs, \
script_list_item_size, \
script_names, \
open_output
@ -430,6 +434,22 @@ def write_records(records, record_size):
f.write('};\n\n')
# Write a bit set
def write_bitsets(list, item_size):
for d in list:
bitwords = [0] * item_size
for idx in d:
bitwords[idx // 32] |= 1 << (idx & 31)
s = " "
for x in bitwords:
f.write("%s" % s)
s = ", "
f.write("0x%08xu" % x)
f.write(",\n")
f.write("};\n\n")
# ---------------------------------------------------------------------------
# This bit of code must have been useful when the original script was being
# developed. Retain it just in case it is ever needed again.
@ -475,27 +495,6 @@ break_props = read_table('Unicode.tables/GraphemeBreakProperty.txt', make_get_na
other_case = read_table('Unicode.tables/CaseFolding.txt', get_other_case, 0)
bidi_class = read_table('Unicode.tables/DerivedBidiClass.txt', make_get_names(bidi_classes), bidi_classes.index('L'))
# The Bidi_Control property is a Y/N value, so needs only one bit. We scan the
# PropList.txt file and set 0x80 bit in the bidi_class table.
file = open('Unicode.tables/PropList.txt', 'r', encoding='utf-8')
for line in file:
line = re.sub(r'#.*', '', line)
chardata = list(map(str.strip, line.split(';')))
if len(chardata) <= 1:
continue
if chardata[1] != "Bidi_Control":
continue
m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0])
char = int(m.group(1), 16)
if m.group(3) is None:
last = char
else:
last = int(m.group(3), 16)
for i in range(char, last + 1):
bidi_class[i] |= 0x80;
file.close()
# The grapheme breaking rules were changed for Unicode 11.0.0 (June 2018). Now
# we need to find the Extended_Pictographic property for emoji characters. This
# can be set as an additional grapheme break property, because the default for
@ -532,17 +531,103 @@ file.close()
# element is never used.
script_lists = [[]]
script_list_item_size = (script_names.index('Unknown') + 31) // 32
last_script_extension = ""
scriptx = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0)
# Find the Boolean properties of each character. This next bit of magic creates
# a list of empty lists. Just using [[]] * MAX_UNICODE gives a list of
# references to the *same* list, which is not what we want.
bprops = [[] for _ in range(MAX_UNICODE)]
# Collect the properties from the various files
for filename in bool_propsfiles:
try:
file = open('Unicode.tables/' + filename, 'r')
except IOError:
print(f"** Couldn't open {'Unicode.tables/' + filename}\n")
sys.exit(1)
for line in file:
line = re.sub(r'#.*', '', line)
data = list(map(str.strip, line.split(';')))
if len(data) <= 1:
continue
try:
ix = bool_properties.index(data[1])
except ValueError:
continue
m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', data[0])
char = int(m.group(1), 16)
if m.group(3) is None:
last = char
else:
last = int(m.group(3), 16)
for i in range(char, last + 1):
bprops[i].append(ix)
file.close()
# The ASCII property isn't listed in any files, but it is easy enough to add
# it manually.
ix = bool_properties.index("ASCII")
for i in range(128):
bprops[i].append(ix)
# The Bidi_Mirrored property isn't listed in any property files. We have to
# deduce it from the file that lists the mirrored characters.
ix = bool_properties.index("Bidi_Mirrored")
try:
file = open('Unicode.tables/BidiMirroring.txt', 'r')
except IOError:
print(f"** Couldn't open {'Unicode.tables/BidiMirroring.txt'}\n")
sys.exit(1)
for line in file:
line = re.sub(r'#.*', '', line)
data = list(map(str.strip, line.split(';')))
if len(data) <= 1:
continue
c = int(data[0], 16)
bprops[c].append(ix)
file.close()
# Scan each character's boolean property list and created a list of unique
# lists, at the same time, setting the index in that list for each property in
# the bool_props vector.
bool_props = [0] * MAX_UNICODE
bool_props_lists = [[]]
for c in range(MAX_UNICODE):
s = set(bprops[c])
for i in range(len(bool_props_lists)):
if s == set(bool_props_lists[i]):
break;
else:
bool_props_lists.append(bprops[c])
i += 1
bool_props[c] = i
# With the addition of the Script Extensions field, we needed some padding to
# get the Unicode records up to 12 bytes (multiple of 4). Originally this was a
# 16-bit field and padding_dummy[0] was set to 256 to ensure this, but 8 bits
# are now used for the bidi class, so zero will do.
# are now used, so zero will do.
padding_dummy = [0] * MAX_UNICODE
padding_dummy[0] = 256
padding_dummy[0] = 0
# This block of code was added by PH in September 2012. It scans the other_case
# table to find sets of more than two characters that must all match each other
@ -616,7 +701,7 @@ for s in caseless_sets:
# Combine all the tables
table, records = combine_tables(script, category, break_props,
caseless_offsets, other_case, scriptx, bidi_class, padding_dummy)
caseless_offsets, other_case, scriptx, bidi_class, bool_props, padding_dummy)
# Find the record size and create a string definition of the structure for
# outputting as a comment.
@ -708,6 +793,7 @@ const ucd_record PRIV(dummy_ucd_record)[] = {{
0, /* other case */
ucp_Unknown, /* script extension */
ucp_bidiL, /* bidi class */
0, /* bool properties offset */
0 /* dummy filler */
}};
#endif
@ -782,21 +868,15 @@ f.write("""\
const uint32_t PRIV(ucd_script_sets)[] = {
""")
write_bitsets(script_lists, script_list_item_size)
for d in script_lists:
bitwords = [0] * script_list_item_size
f.write("""\
/* This vector is a list of bitsets for Boolean properties. */
for idx in d:
bitwords[idx // 32] |= 1 << (idx & 31)
const uint32_t PRIV(ucd_boolprop_sets)[] = {
""")
write_bitsets(bool_props_lists, bool_props_list_item_size)
s = " "
for x in bitwords:
f.write("%s" % s)
s = ", "
f.write("0x%08xu" % x)
f.write(",\n")
f.write("};\n\n")
# Output the main UCD tables.
@ -804,8 +884,9 @@ f.write("""\
/* These are the main two-stage UCD tables. The fields in each record are:
script (8 bits), character type (8 bits), grapheme break property (8 bits),
offset to multichar other cases or zero (8 bits), offset to other case or zero
(32 bits, signed), script extension (8 bits), bidi class (8 bits), and a dummy
16-bit field to make the whole thing a multiple of 4 bytes. */
(32 bits, signed), script extension (8 bits), bidi class (8 bits), bool
properties offset (8 bits), and a dummy 8-bit field to make the whole thing a
multiple of 4 bytes. */
\n""")
write_records(records, record_size)

View File

@ -15,9 +15,12 @@
from GenerateCommon import \
bidi_classes, \
bool_properties, \
bool_props_list_item_size, \
break_properties, \
category_names, \
general_category_names, \
script_list_item_size, \
script_names, \
open_output
@ -51,6 +54,16 @@ for i in range(0, len(category_names), 2):
f.write(" ucp_%s, /* %s */\n" % (category_names[i], category_names[i+1]))
f.write("};\n\n")
f.write("/* These are Boolean properties. */\n\nenum {\n")
for i in bool_properties:
f.write(" ucp_%s,\n" % i)
f.write(" /* This must be last */\n")
f.write(" ucp_Bprop_Count\n};\n\n")
f.write("/* Size of entries in ucd_boolprop_sets[] */\n\n")
f.write("#define ucd_boolprop_sets_item_size %d\n\n" % bool_props_list_item_size)
f.write("/* These are the bidi class values. */\n\nenum {\n")
for i in range(0, len(bidi_classes), 2):
sp = ' ' * (4 - len(bidi_classes[i]))
@ -74,9 +87,12 @@ f.write("\n")
f.write(" /* This must be last */\n")
f.write(" ucp_Script_Count\n};\n\n")
f.write("/* Size of entries in ucd_script_sets[] */\n\n")
f.write("#define ucd_script_sets_item_size %d\n\n" % script_list_item_size)
f.write("#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */\n\n")
f.write("/* End of pcre2_ucp.h */\n")
f.close
f.close()
# End

View File

@ -49,6 +49,7 @@
from GenerateCommon import \
abbreviations, \
bool_properties, \
bidi_classes, \
category_names, \
general_category_names, \
@ -87,6 +88,7 @@ def stdnames(x):
std_category_names = stdnames(category_names)
std_general_category_names = stdnames(general_category_names)
std_bidi_class_names = stdnames(bidi_class_names)
std_bool_properties = stdnames(bool_properties)
# Create the table, starting with the Unicode script, category and bidi class
# names. We keep both the standardized name and the original, because the
@ -99,7 +101,6 @@ scx_end = script_names.index('Unknown')
for idx, name in enumerate(script_names):
pt_type = 'PT_SCX' if idx < scx_end else 'PT_SC'
utt_table.append((stdname(name), name, pt_type))
for abbrev in abbreviations[name]:
utt_table.append((stdname(abbrev), name, pt_type))
@ -110,12 +111,20 @@ utt_table += list(zip(std_category_names, category_names, ['PT_PC'] * len(catego
utt_table += list(zip(std_general_category_names, general_category_names, ['PT_GC'] * len(general_category_names)))
utt_table += list(zip(std_bidi_class_names, bidi_class_names, ['PT_BIDICL'] * len(bidi_class_names)))
for name in bool_properties:
utt_table.append((stdname(name), name, 'PT_BOOL'))
if name in abbreviations:
for abbrev in abbreviations[name]:
utt_table.append((stdname(abbrev), name, 'PT_BOOL'))
#utt_table += list(zip(std_bool_properties, bool_properties, ['PT_BOOL'] * len(bool_properties)))
# Now add specials and synonyms. Note both the standardized and capitalized
# forms are needed.
utt_table.append(('any', 'Any', 'PT_ANY'))
utt_table.append(('bidic', 'BidiC', 'PT_BIDICO'))
utt_table.append(('bidicontrol', 'Bidi_Control', 'PT_BIDICO'))
utt_table.append(('l&', 'L&', 'PT_LAMP'))
utt_table.append(('lc', 'LC', 'PT_LAMP'))
utt_table.append(('xan', 'Xan', 'PT_ALNUM'))
@ -171,7 +180,7 @@ offset = 0
last = ','
for utt in utt_table:
if utt[2] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE',
'PT_SPACE', 'PT_UCNC', 'PT_WORD', 'PT_BIDICO'):
'PT_SPACE', 'PT_UCNC', 'PT_WORD'):
value = '0'
else:
value = 'ucp_' + utt[1]

View File

@ -0,0 +1,633 @@
# BidiMirroring-14.0.0.txt
# Date: 2021-08-08, 22:55:00 GMT [KW, RP]
# © 2021 Unicode®, Inc.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see https://www.unicode.org/reports/tr44/
#
# Bidi_Mirroring_Glyph Property
#
# This file is an informative contributory data file in the
# Unicode Character Database.
#
# This data file lists characters that have the Bidi_Mirrored=Yes property
# value, for which there is another Unicode character that typically has a glyph
# that is the mirror image of the original character's glyph.
#
# The repertoire covered by the file is Unicode 14.0.0.
#
# The file contains a list of lines with mappings from one code point
# to another one for character-based mirroring.
# Note that for "real" mirroring, a rendering engine needs to select
# appropriate alternative glyphs, and that many Unicode characters do not
# have a mirror-image Unicode character.
#
# Each mapping line contains two fields, separated by a semicolon (';').
# Each of the two fields contains a code point represented as a
# variable-length hexadecimal value with 4 to 6 digits.
# A comment indicates where the characters are "BEST FIT" mirroring.
#
# Code points for which Bidi_Mirrored=Yes, but for which no appropriate
# characters exist with mirrored glyphs, are
# listed as comments at the end of the file.
#
# Formally, the default value of the Bidi_Mirroring_Glyph property
# for each code point is <none>, unless a mapping to
# some other character is specified in this data file. When a code
# point has the default value for the Bidi_Mirroring_Glyph property,
# that means that no other character exists whose glyph is suitable
# for character-based mirroring.
#
# For information on bidi mirroring, see UAX #9: Unicode Bidirectional Algorithm,
# at https://www.unicode.org/reports/tr9/
#
# This file was originally created by Markus Scherer.
# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler,
# and for subsequent versions by Ken Whistler, Laurentiu Iancu, and Roozbeh Pournader.
#
# Historical and Compatibility Information:
#
# The OpenType Mirroring Pairs List (OMPL) is frozen to match the
# Unicode 5.1 version of the Bidi_Mirroring_Glyph property (2008).
# See https://www.microsoft.com/typography/otspec/ompl.txt
#
# The Unicode 6.1 version of the Bidi_Mirroring_Glyph property (2011)
# added one mirroring pair: 27CB <--> 27CD.
#
# The Unicode 11.0 version of the Bidi_Mirroring_Glyph property (2018)
# underwent a substantial revision, to formally recognize all of the
# exact mirroring pairs and "BEST FIT" mirroring pairs that had been
# added after the freezing of the OMPL list. As a result, starting
# with Unicode 11.0, the bmg mapping values more accurately reflect
# the current status of glyphs for Bidi_Mirrored characters in
# the Unicode Standard, but this listing now extends significantly
# beyond the frozen OMPL list. Implementers should be aware of this
# intentional distinction.
#
# ############################################################
#
# Property: Bidi_Mirroring_Glyph
#
# @missing: 0000..10FFFF; <none>
0028; 0029 # LEFT PARENTHESIS
0029; 0028 # RIGHT PARENTHESIS
003C; 003E # LESS-THAN SIGN
003E; 003C # GREATER-THAN SIGN
005B; 005D # LEFT SQUARE BRACKET
005D; 005B # RIGHT SQUARE BRACKET
007B; 007D # LEFT CURLY BRACKET
007D; 007B # RIGHT CURLY BRACKET
00AB; 00BB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
00BB; 00AB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0F3A; 0F3B # TIBETAN MARK GUG RTAGS GYON
0F3B; 0F3A # TIBETAN MARK GUG RTAGS GYAS
0F3C; 0F3D # TIBETAN MARK ANG KHANG GYON
0F3D; 0F3C # TIBETAN MARK ANG KHANG GYAS
169B; 169C # OGHAM FEATHER MARK
169C; 169B # OGHAM REVERSED FEATHER MARK
2039; 203A # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
203A; 2039 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
2045; 2046 # LEFT SQUARE BRACKET WITH QUILL
2046; 2045 # RIGHT SQUARE BRACKET WITH QUILL
207D; 207E # SUPERSCRIPT LEFT PARENTHESIS
207E; 207D # SUPERSCRIPT RIGHT PARENTHESIS
208D; 208E # SUBSCRIPT LEFT PARENTHESIS
208E; 208D # SUBSCRIPT RIGHT PARENTHESIS
2208; 220B # ELEMENT OF
2209; 220C # [BEST FIT] NOT AN ELEMENT OF
220A; 220D # SMALL ELEMENT OF
220B; 2208 # CONTAINS AS MEMBER
220C; 2209 # [BEST FIT] DOES NOT CONTAIN AS MEMBER
220D; 220A # SMALL CONTAINS AS MEMBER
2215; 29F5 # DIVISION SLASH
221F; 2BFE # RIGHT ANGLE
2220; 29A3 # ANGLE
2221; 299B # MEASURED ANGLE
2222; 29A0 # SPHERICAL ANGLE
2224; 2AEE # DOES NOT DIVIDE
223C; 223D # TILDE OPERATOR
223D; 223C # REVERSED TILDE
2243; 22CD # ASYMPTOTICALLY EQUAL TO
2245; 224C # APPROXIMATELY EQUAL TO
224C; 2245 # ALL EQUAL TO
2252; 2253 # APPROXIMATELY EQUAL TO OR THE IMAGE OF
2253; 2252 # IMAGE OF OR APPROXIMATELY EQUAL TO
2254; 2255 # COLON EQUALS
2255; 2254 # EQUALS COLON
2264; 2265 # LESS-THAN OR EQUAL TO
2265; 2264 # GREATER-THAN OR EQUAL TO
2266; 2267 # LESS-THAN OVER EQUAL TO
2267; 2266 # GREATER-THAN OVER EQUAL TO
2268; 2269 # [BEST FIT] LESS-THAN BUT NOT EQUAL TO
2269; 2268 # [BEST FIT] GREATER-THAN BUT NOT EQUAL TO
226A; 226B # MUCH LESS-THAN
226B; 226A # MUCH GREATER-THAN
226E; 226F # [BEST FIT] NOT LESS-THAN
226F; 226E # [BEST FIT] NOT GREATER-THAN
2270; 2271 # [BEST FIT] NEITHER LESS-THAN NOR EQUAL TO
2271; 2270 # [BEST FIT] NEITHER GREATER-THAN NOR EQUAL TO
2272; 2273 # [BEST FIT] LESS-THAN OR EQUIVALENT TO
2273; 2272 # [BEST FIT] GREATER-THAN OR EQUIVALENT TO
2274; 2275 # [BEST FIT] NEITHER LESS-THAN NOR EQUIVALENT TO
2275; 2274 # [BEST FIT] NEITHER GREATER-THAN NOR EQUIVALENT TO
2276; 2277 # LESS-THAN OR GREATER-THAN
2277; 2276 # GREATER-THAN OR LESS-THAN
2278; 2279 # [BEST FIT] NEITHER LESS-THAN NOR GREATER-THAN
2279; 2278 # [BEST FIT] NEITHER GREATER-THAN NOR LESS-THAN
227A; 227B # PRECEDES
227B; 227A # SUCCEEDS
227C; 227D # PRECEDES OR EQUAL TO
227D; 227C # SUCCEEDS OR EQUAL TO
227E; 227F # [BEST FIT] PRECEDES OR EQUIVALENT TO
227F; 227E # [BEST FIT] SUCCEEDS OR EQUIVALENT TO
2280; 2281 # [BEST FIT] DOES NOT PRECEDE
2281; 2280 # [BEST FIT] DOES NOT SUCCEED
2282; 2283 # SUBSET OF
2283; 2282 # SUPERSET OF
2284; 2285 # [BEST FIT] NOT A SUBSET OF
2285; 2284 # [BEST FIT] NOT A SUPERSET OF
2286; 2287 # SUBSET OF OR EQUAL TO
2287; 2286 # SUPERSET OF OR EQUAL TO
2288; 2289 # [BEST FIT] NEITHER A SUBSET OF NOR EQUAL TO
2289; 2288 # [BEST FIT] NEITHER A SUPERSET OF NOR EQUAL TO
228A; 228B # [BEST FIT] SUBSET OF WITH NOT EQUAL TO
228B; 228A # [BEST FIT] SUPERSET OF WITH NOT EQUAL TO
228F; 2290 # SQUARE IMAGE OF
2290; 228F # SQUARE ORIGINAL OF
2291; 2292 # SQUARE IMAGE OF OR EQUAL TO
2292; 2291 # SQUARE ORIGINAL OF OR EQUAL TO
2298; 29B8 # CIRCLED DIVISION SLASH
22A2; 22A3 # RIGHT TACK
22A3; 22A2 # LEFT TACK
22A6; 2ADE # ASSERTION
22A8; 2AE4 # TRUE
22A9; 2AE3 # FORCES
22AB; 2AE5 # DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
22B0; 22B1 # PRECEDES UNDER RELATION
22B1; 22B0 # SUCCEEDS UNDER RELATION
22B2; 22B3 # NORMAL SUBGROUP OF
22B3; 22B2 # CONTAINS AS NORMAL SUBGROUP
22B4; 22B5 # NORMAL SUBGROUP OF OR EQUAL TO
22B5; 22B4 # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
22B6; 22B7 # ORIGINAL OF
22B7; 22B6 # IMAGE OF
22B8; 27DC # MULTIMAP
22C9; 22CA # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT
22CA; 22C9 # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT
22CB; 22CC # LEFT SEMIDIRECT PRODUCT
22CC; 22CB # RIGHT SEMIDIRECT PRODUCT
22CD; 2243 # REVERSED TILDE EQUALS
22D0; 22D1 # DOUBLE SUBSET
22D1; 22D0 # DOUBLE SUPERSET
22D6; 22D7 # LESS-THAN WITH DOT
22D7; 22D6 # GREATER-THAN WITH DOT
22D8; 22D9 # VERY MUCH LESS-THAN
22D9; 22D8 # VERY MUCH GREATER-THAN
22DA; 22DB # LESS-THAN EQUAL TO OR GREATER-THAN
22DB; 22DA # GREATER-THAN EQUAL TO OR LESS-THAN
22DC; 22DD # EQUAL TO OR LESS-THAN
22DD; 22DC # EQUAL TO OR GREATER-THAN
22DE; 22DF # EQUAL TO OR PRECEDES
22DF; 22DE # EQUAL TO OR SUCCEEDS
22E0; 22E1 # [BEST FIT] DOES NOT PRECEDE OR EQUAL
22E1; 22E0 # [BEST FIT] DOES NOT SUCCEED OR EQUAL
22E2; 22E3 # [BEST FIT] NOT SQUARE IMAGE OF OR EQUAL TO
22E3; 22E2 # [BEST FIT] NOT SQUARE ORIGINAL OF OR EQUAL TO
22E4; 22E5 # [BEST FIT] SQUARE IMAGE OF OR NOT EQUAL TO
22E5; 22E4 # [BEST FIT] SQUARE ORIGINAL OF OR NOT EQUAL TO
22E6; 22E7 # [BEST FIT] LESS-THAN BUT NOT EQUIVALENT TO
22E7; 22E6 # [BEST FIT] GREATER-THAN BUT NOT EQUIVALENT TO
22E8; 22E9 # [BEST FIT] PRECEDES BUT NOT EQUIVALENT TO
22E9; 22E8 # [BEST FIT] SUCCEEDS BUT NOT EQUIVALENT TO
22EA; 22EB # [BEST FIT] NOT NORMAL SUBGROUP OF
22EB; 22EA # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP
22EC; 22ED # [BEST FIT] NOT NORMAL SUBGROUP OF OR EQUAL TO
22ED; 22EC # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
22F0; 22F1 # UP RIGHT DIAGONAL ELLIPSIS
22F1; 22F0 # DOWN RIGHT DIAGONAL ELLIPSIS
22F2; 22FA # ELEMENT OF WITH LONG HORIZONTAL STROKE
22F3; 22FB # ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
22F4; 22FC # SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
22F6; 22FD # ELEMENT OF WITH OVERBAR
22F7; 22FE # SMALL ELEMENT OF WITH OVERBAR
22FA; 22F2 # CONTAINS WITH LONG HORIZONTAL STROKE
22FB; 22F3 # CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
22FC; 22F4 # SMALL CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE
22FD; 22F6 # CONTAINS WITH OVERBAR
22FE; 22F7 # SMALL CONTAINS WITH OVERBAR
2308; 2309 # LEFT CEILING
2309; 2308 # RIGHT CEILING
230A; 230B # LEFT FLOOR
230B; 230A # RIGHT FLOOR
2329; 232A # LEFT-POINTING ANGLE BRACKET
232A; 2329 # RIGHT-POINTING ANGLE BRACKET
2768; 2769 # MEDIUM LEFT PARENTHESIS ORNAMENT
2769; 2768 # MEDIUM RIGHT PARENTHESIS ORNAMENT
276A; 276B # MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
276B; 276A # MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
276C; 276D # MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
276D; 276C # MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
276E; 276F # HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
276F; 276E # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
2770; 2771 # HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
2771; 2770 # HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
2772; 2773 # LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
2773; 2772 # LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
2774; 2775 # MEDIUM LEFT CURLY BRACKET ORNAMENT
2775; 2774 # MEDIUM RIGHT CURLY BRACKET ORNAMENT
27C3; 27C4 # OPEN SUBSET
27C4; 27C3 # OPEN SUPERSET
27C5; 27C6 # LEFT S-SHAPED BAG DELIMITER
27C6; 27C5 # RIGHT S-SHAPED BAG DELIMITER
27C8; 27C9 # REVERSE SOLIDUS PRECEDING SUBSET
27C9; 27C8 # SUPERSET PRECEDING SOLIDUS
27CB; 27CD # MATHEMATICAL RISING DIAGONAL
27CD; 27CB # MATHEMATICAL FALLING DIAGONAL
27D5; 27D6 # LEFT OUTER JOIN
27D6; 27D5 # RIGHT OUTER JOIN
27DC; 22B8 # LEFT MULTIMAP
27DD; 27DE # LONG RIGHT TACK
27DE; 27DD # LONG LEFT TACK
27E2; 27E3 # WHITE CONCAVE-SIDED DIAMOND WITH LEFTWARDS TICK
27E3; 27E2 # WHITE CONCAVE-SIDED DIAMOND WITH RIGHTWARDS TICK
27E4; 27E5 # WHITE SQUARE WITH LEFTWARDS TICK
27E5; 27E4 # WHITE SQUARE WITH RIGHTWARDS TICK
27E6; 27E7 # MATHEMATICAL LEFT WHITE SQUARE BRACKET
27E7; 27E6 # MATHEMATICAL RIGHT WHITE SQUARE BRACKET
27E8; 27E9 # MATHEMATICAL LEFT ANGLE BRACKET
27E9; 27E8 # MATHEMATICAL RIGHT ANGLE BRACKET
27EA; 27EB # MATHEMATICAL LEFT DOUBLE ANGLE BRACKET
27EB; 27EA # MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET
27EC; 27ED # MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET
27ED; 27EC # MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET
27EE; 27EF # MATHEMATICAL LEFT FLATTENED PARENTHESIS
27EF; 27EE # MATHEMATICAL RIGHT FLATTENED PARENTHESIS
2983; 2984 # LEFT WHITE CURLY BRACKET
2984; 2983 # RIGHT WHITE CURLY BRACKET
2985; 2986 # LEFT WHITE PARENTHESIS
2986; 2985 # RIGHT WHITE PARENTHESIS
2987; 2988 # Z NOTATION LEFT IMAGE BRACKET
2988; 2987 # Z NOTATION RIGHT IMAGE BRACKET
2989; 298A # Z NOTATION LEFT BINDING BRACKET
298A; 2989 # Z NOTATION RIGHT BINDING BRACKET
298B; 298C # LEFT SQUARE BRACKET WITH UNDERBAR
298C; 298B # RIGHT SQUARE BRACKET WITH UNDERBAR
298D; 2990 # LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
298E; 298F # RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
298F; 298E # LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
2990; 298D # RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER
2991; 2992 # LEFT ANGLE BRACKET WITH DOT
2992; 2991 # RIGHT ANGLE BRACKET WITH DOT
2993; 2994 # LEFT ARC LESS-THAN BRACKET
2994; 2993 # RIGHT ARC GREATER-THAN BRACKET
2995; 2996 # DOUBLE LEFT ARC GREATER-THAN BRACKET
2996; 2995 # DOUBLE RIGHT ARC LESS-THAN BRACKET
2997; 2998 # LEFT BLACK TORTOISE SHELL BRACKET
2998; 2997 # RIGHT BLACK TORTOISE SHELL BRACKET
299B; 2221 # MEASURED ANGLE OPENING LEFT
29A0; 2222 # SPHERICAL ANGLE OPENING LEFT
29A3; 2220 # REVERSED ANGLE
29A4; 29A5 # ANGLE WITH UNDERBAR
29A5; 29A4 # REVERSED ANGLE WITH UNDERBAR
29A8; 29A9 # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING UP AND RIGHT
29A9; 29A8 # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING UP AND LEFT
29AA; 29AB # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING DOWN AND RIGHT
29AB; 29AA # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING DOWN AND LEFT
29AC; 29AD # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING RIGHT AND UP
29AD; 29AC # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING LEFT AND UP
29AE; 29AF # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING RIGHT AND DOWN
29AF; 29AE # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING LEFT AND DOWN
29B8; 2298 # CIRCLED REVERSE SOLIDUS
29C0; 29C1 # CIRCLED LESS-THAN
29C1; 29C0 # CIRCLED GREATER-THAN
29C4; 29C5 # SQUARED RISING DIAGONAL SLASH
29C5; 29C4 # SQUARED FALLING DIAGONAL SLASH
29CF; 29D0 # LEFT TRIANGLE BESIDE VERTICAL BAR
29D0; 29CF # VERTICAL BAR BESIDE RIGHT TRIANGLE
29D1; 29D2 # BOWTIE WITH LEFT HALF BLACK
29D2; 29D1 # BOWTIE WITH RIGHT HALF BLACK
29D4; 29D5 # TIMES WITH LEFT HALF BLACK
29D5; 29D4 # TIMES WITH RIGHT HALF BLACK
29D8; 29D9 # LEFT WIGGLY FENCE
29D9; 29D8 # RIGHT WIGGLY FENCE
29DA; 29DB # LEFT DOUBLE WIGGLY FENCE
29DB; 29DA # RIGHT DOUBLE WIGGLY FENCE
29E8; 29E9 # DOWN-POINTING TRIANGLE WITH LEFT HALF BLACK
29E9; 29E8 # DOWN-POINTING TRIANGLE WITH RIGHT HALF BLACK
29F5; 2215 # REVERSE SOLIDUS OPERATOR
29F8; 29F9 # BIG SOLIDUS
29F9; 29F8 # BIG REVERSE SOLIDUS
29FC; 29FD # LEFT-POINTING CURVED ANGLE BRACKET
29FD; 29FC # RIGHT-POINTING CURVED ANGLE BRACKET
2A2B; 2A2C # MINUS SIGN WITH FALLING DOTS
2A2C; 2A2B # MINUS SIGN WITH RISING DOTS
2A2D; 2A2E # PLUS SIGN IN LEFT HALF CIRCLE
2A2E; 2A2D # PLUS SIGN IN RIGHT HALF CIRCLE
2A34; 2A35 # MULTIPLICATION SIGN IN LEFT HALF CIRCLE
2A35; 2A34 # MULTIPLICATION SIGN IN RIGHT HALF CIRCLE
2A3C; 2A3D # INTERIOR PRODUCT
2A3D; 2A3C # RIGHTHAND INTERIOR PRODUCT
2A64; 2A65 # Z NOTATION DOMAIN ANTIRESTRICTION
2A65; 2A64 # Z NOTATION RANGE ANTIRESTRICTION
2A79; 2A7A # LESS-THAN WITH CIRCLE INSIDE
2A7A; 2A79 # GREATER-THAN WITH CIRCLE INSIDE
2A7B; 2A7C # [BEST FIT] LESS-THAN WITH QUESTION MARK ABOVE
2A7C; 2A7B # [BEST FIT] GREATER-THAN WITH QUESTION MARK ABOVE
2A7D; 2A7E # LESS-THAN OR SLANTED EQUAL TO
2A7E; 2A7D # GREATER-THAN OR SLANTED EQUAL TO
2A7F; 2A80 # LESS-THAN OR SLANTED EQUAL TO WITH DOT INSIDE
2A80; 2A7F # GREATER-THAN OR SLANTED EQUAL TO WITH DOT INSIDE
2A81; 2A82 # LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE
2A82; 2A81 # GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE
2A83; 2A84 # LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE RIGHT
2A84; 2A83 # GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE LEFT
2A85; 2A86 # [BEST FIT] LESS-THAN OR APPROXIMATE
2A86; 2A85 # [BEST FIT] GREATER-THAN OR APPROXIMATE
2A87; 2A88 # [BEST FIT] LESS-THAN AND SINGLE-LINE NOT EQUAL TO
2A88; 2A87 # [BEST FIT] GREATER-THAN AND SINGLE-LINE NOT EQUAL TO
2A89; 2A8A # [BEST FIT] LESS-THAN AND NOT APPROXIMATE
2A8A; 2A89 # [BEST FIT] GREATER-THAN AND NOT APPROXIMATE
2A8B; 2A8C # LESS-THAN ABOVE DOUBLE-LINE EQUAL ABOVE GREATER-THAN
2A8C; 2A8B # GREATER-THAN ABOVE DOUBLE-LINE EQUAL ABOVE LESS-THAN
2A8D; 2A8E # [BEST FIT] LESS-THAN ABOVE SIMILAR OR EQUAL
2A8E; 2A8D # [BEST FIT] GREATER-THAN ABOVE SIMILAR OR EQUAL
2A8F; 2A90 # [BEST FIT] LESS-THAN ABOVE SIMILAR ABOVE GREATER-THAN
2A90; 2A8F # [BEST FIT] GREATER-THAN ABOVE SIMILAR ABOVE LESS-THAN
2A91; 2A92 # LESS-THAN ABOVE GREATER-THAN ABOVE DOUBLE-LINE EQUAL
2A92; 2A91 # GREATER-THAN ABOVE LESS-THAN ABOVE DOUBLE-LINE EQUAL
2A93; 2A94 # LESS-THAN ABOVE SLANTED EQUAL ABOVE GREATER-THAN ABOVE SLANTED EQUAL
2A94; 2A93 # GREATER-THAN ABOVE SLANTED EQUAL ABOVE LESS-THAN ABOVE SLANTED EQUAL
2A95; 2A96 # SLANTED EQUAL TO OR LESS-THAN
2A96; 2A95 # SLANTED EQUAL TO OR GREATER-THAN
2A97; 2A98 # SLANTED EQUAL TO OR LESS-THAN WITH DOT INSIDE
2A98; 2A97 # SLANTED EQUAL TO OR GREATER-THAN WITH DOT INSIDE
2A99; 2A9A # DOUBLE-LINE EQUAL TO OR LESS-THAN
2A9A; 2A99 # DOUBLE-LINE EQUAL TO OR GREATER-THAN
2A9B; 2A9C # DOUBLE-LINE SLANTED EQUAL TO OR LESS-THAN
2A9C; 2A9B # DOUBLE-LINE SLANTED EQUAL TO OR GREATER-THAN
2A9D; 2A9E # [BEST FIT] SIMILAR OR LESS-THAN
2A9E; 2A9D # [BEST FIT] SIMILAR OR GREATER-THAN
2A9F; 2AA0 # [BEST FIT] SIMILAR ABOVE LESS-THAN ABOVE EQUALS SIGN
2AA0; 2A9F # [BEST FIT] SIMILAR ABOVE GREATER-THAN ABOVE EQUALS SIGN
2AA1; 2AA2 # DOUBLE NESTED LESS-THAN
2AA2; 2AA1 # DOUBLE NESTED GREATER-THAN
2AA6; 2AA7 # LESS-THAN CLOSED BY CURVE
2AA7; 2AA6 # GREATER-THAN CLOSED BY CURVE
2AA8; 2AA9 # LESS-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL
2AA9; 2AA8 # GREATER-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL
2AAA; 2AAB # SMALLER THAN
2AAB; 2AAA # LARGER THAN
2AAC; 2AAD # SMALLER THAN OR EQUAL TO
2AAD; 2AAC # LARGER THAN OR EQUAL TO
2AAF; 2AB0 # PRECEDES ABOVE SINGLE-LINE EQUALS SIGN
2AB0; 2AAF # SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN
2AB1; 2AB2 # [BEST FIT] PRECEDES ABOVE SINGLE-LINE NOT EQUAL TO
2AB2; 2AB1 # [BEST FIT] SUCCEEDS ABOVE SINGLE-LINE NOT EQUAL TO
2AB3; 2AB4 # PRECEDES ABOVE EQUALS SIGN
2AB4; 2AB3 # SUCCEEDS ABOVE EQUALS SIGN
2AB5; 2AB6 # [BEST FIT] PRECEDES ABOVE NOT EQUAL TO
2AB6; 2AB5 # [BEST FIT] SUCCEEDS ABOVE NOT EQUAL TO
2AB7; 2AB8 # [BEST FIT] PRECEDES ABOVE ALMOST EQUAL TO
2AB8; 2AB7 # [BEST FIT] SUCCEEDS ABOVE ALMOST EQUAL TO
2AB9; 2ABA # [BEST FIT] PRECEDES ABOVE NOT ALMOST EQUAL TO
2ABA; 2AB9 # [BEST FIT] SUCCEEDS ABOVE NOT ALMOST EQUAL TO
2ABB; 2ABC # DOUBLE PRECEDES
2ABC; 2ABB # DOUBLE SUCCEEDS
2ABD; 2ABE # SUBSET WITH DOT
2ABE; 2ABD # SUPERSET WITH DOT
2ABF; 2AC0 # SUBSET WITH PLUS SIGN BELOW
2AC0; 2ABF # SUPERSET WITH PLUS SIGN BELOW
2AC1; 2AC2 # SUBSET WITH MULTIPLICATION SIGN BELOW
2AC2; 2AC1 # SUPERSET WITH MULTIPLICATION SIGN BELOW
2AC3; 2AC4 # SUBSET OF OR EQUAL TO WITH DOT ABOVE
2AC4; 2AC3 # SUPERSET OF OR EQUAL TO WITH DOT ABOVE
2AC5; 2AC6 # SUBSET OF ABOVE EQUALS SIGN
2AC6; 2AC5 # SUPERSET OF ABOVE EQUALS SIGN
2AC7; 2AC8 # [BEST FIT] SUBSET OF ABOVE TILDE OPERATOR
2AC8; 2AC7 # [BEST FIT] SUPERSET OF ABOVE TILDE OPERATOR
2AC9; 2ACA # [BEST FIT] SUBSET OF ABOVE ALMOST EQUAL TO
2ACA; 2AC9 # [BEST FIT] SUPERSET OF ABOVE ALMOST EQUAL TO
2ACB; 2ACC # [BEST FIT] SUBSET OF ABOVE NOT EQUAL TO
2ACC; 2ACB # [BEST FIT] SUPERSET OF ABOVE NOT EQUAL TO
2ACD; 2ACE # SQUARE LEFT OPEN BOX OPERATOR
2ACE; 2ACD # SQUARE RIGHT OPEN BOX OPERATOR
2ACF; 2AD0 # CLOSED SUBSET
2AD0; 2ACF # CLOSED SUPERSET
2AD1; 2AD2 # CLOSED SUBSET OR EQUAL TO
2AD2; 2AD1 # CLOSED SUPERSET OR EQUAL TO
2AD3; 2AD4 # SUBSET ABOVE SUPERSET
2AD4; 2AD3 # SUPERSET ABOVE SUBSET
2AD5; 2AD6 # SUBSET ABOVE SUBSET
2AD6; 2AD5 # SUPERSET ABOVE SUPERSET
2ADE; 22A6 # SHORT LEFT TACK
2AE3; 22A9 # DOUBLE VERTICAL BAR LEFT TURNSTILE
2AE4; 22A8 # VERTICAL BAR DOUBLE LEFT TURNSTILE
2AE5; 22AB # DOUBLE VERTICAL BAR DOUBLE LEFT TURNSTILE
2AEC; 2AED # DOUBLE STROKE NOT SIGN
2AED; 2AEC # REVERSED DOUBLE STROKE NOT SIGN
2AEE; 2224 # DOES NOT DIVIDE WITH REVERSED NEGATION SLASH
2AF7; 2AF8 # TRIPLE NESTED LESS-THAN
2AF8; 2AF7 # TRIPLE NESTED GREATER-THAN
2AF9; 2AFA # DOUBLE-LINE SLANTED LESS-THAN OR EQUAL TO
2AFA; 2AF9 # DOUBLE-LINE SLANTED GREATER-THAN OR EQUAL TO
2BFE; 221F # REVERSED RIGHT ANGLE
2E02; 2E03 # LEFT SUBSTITUTION BRACKET
2E03; 2E02 # RIGHT SUBSTITUTION BRACKET
2E04; 2E05 # LEFT DOTTED SUBSTITUTION BRACKET
2E05; 2E04 # RIGHT DOTTED SUBSTITUTION BRACKET
2E09; 2E0A # LEFT TRANSPOSITION BRACKET
2E0A; 2E09 # RIGHT TRANSPOSITION BRACKET
2E0C; 2E0D # LEFT RAISED OMISSION BRACKET
2E0D; 2E0C # RIGHT RAISED OMISSION BRACKET
2E1C; 2E1D # LEFT LOW PARAPHRASE BRACKET
2E1D; 2E1C # RIGHT LOW PARAPHRASE BRACKET
2E20; 2E21 # LEFT VERTICAL BAR WITH QUILL
2E21; 2E20 # RIGHT VERTICAL BAR WITH QUILL
2E22; 2E23 # TOP LEFT HALF BRACKET
2E23; 2E22 # TOP RIGHT HALF BRACKET
2E24; 2E25 # BOTTOM LEFT HALF BRACKET
2E25; 2E24 # BOTTOM RIGHT HALF BRACKET
2E26; 2E27 # LEFT SIDEWAYS U BRACKET
2E27; 2E26 # RIGHT SIDEWAYS U BRACKET
2E28; 2E29 # LEFT DOUBLE PARENTHESIS
2E29; 2E28 # RIGHT DOUBLE PARENTHESIS
2E55; 2E56 # LEFT SQUARE BRACKET WITH STROKE
2E56; 2E55 # RIGHT SQUARE BRACKET WITH STROKE
2E57; 2E58 # LEFT SQUARE BRACKET WITH DOUBLE STROKE
2E58; 2E57 # RIGHT SQUARE BRACKET WITH DOUBLE STROKE
2E59; 2E5A # TOP HALF LEFT PARENTHESIS
2E5A; 2E59 # TOP HALF RIGHT PARENTHESIS
2E5B; 2E5C # BOTTOM HALF LEFT PARENTHESIS
2E5C; 2E5B # BOTTOM HALF RIGHT PARENTHESIS
3008; 3009 # LEFT ANGLE BRACKET
3009; 3008 # RIGHT ANGLE BRACKET
300A; 300B # LEFT DOUBLE ANGLE BRACKET
300B; 300A # RIGHT DOUBLE ANGLE BRACKET
300C; 300D # [BEST FIT] LEFT CORNER BRACKET
300D; 300C # [BEST FIT] RIGHT CORNER BRACKET
300E; 300F # [BEST FIT] LEFT WHITE CORNER BRACKET
300F; 300E # [BEST FIT] RIGHT WHITE CORNER BRACKET
3010; 3011 # LEFT BLACK LENTICULAR BRACKET
3011; 3010 # RIGHT BLACK LENTICULAR BRACKET
3014; 3015 # LEFT TORTOISE SHELL BRACKET
3015; 3014 # RIGHT TORTOISE SHELL BRACKET
3016; 3017 # LEFT WHITE LENTICULAR BRACKET
3017; 3016 # RIGHT WHITE LENTICULAR BRACKET
3018; 3019 # LEFT WHITE TORTOISE SHELL BRACKET
3019; 3018 # RIGHT WHITE TORTOISE SHELL BRACKET
301A; 301B # LEFT WHITE SQUARE BRACKET
301B; 301A # RIGHT WHITE SQUARE BRACKET
FE59; FE5A # SMALL LEFT PARENTHESIS
FE5A; FE59 # SMALL RIGHT PARENTHESIS
FE5B; FE5C # SMALL LEFT CURLY BRACKET
FE5C; FE5B # SMALL RIGHT CURLY BRACKET
FE5D; FE5E # SMALL LEFT TORTOISE SHELL BRACKET
FE5E; FE5D # SMALL RIGHT TORTOISE SHELL BRACKET
FE64; FE65 # SMALL LESS-THAN SIGN
FE65; FE64 # SMALL GREATER-THAN SIGN
FF08; FF09 # FULLWIDTH LEFT PARENTHESIS
FF09; FF08 # FULLWIDTH RIGHT PARENTHESIS
FF1C; FF1E # FULLWIDTH LESS-THAN SIGN
FF1E; FF1C # FULLWIDTH GREATER-THAN SIGN
FF3B; FF3D # FULLWIDTH LEFT SQUARE BRACKET
FF3D; FF3B # FULLWIDTH RIGHT SQUARE BRACKET
FF5B; FF5D # FULLWIDTH LEFT CURLY BRACKET
FF5D; FF5B # FULLWIDTH RIGHT CURLY BRACKET
FF5F; FF60 # FULLWIDTH LEFT WHITE PARENTHESIS
FF60; FF5F # FULLWIDTH RIGHT WHITE PARENTHESIS
FF62; FF63 # [BEST FIT] HALFWIDTH LEFT CORNER BRACKET
FF63; FF62 # [BEST FIT] HALFWIDTH RIGHT CORNER BRACKET
# The following characters have no appropriate mirroring character.
# For these characters it is up to the rendering system
# to provide mirrored glyphs.
# 2140; DOUBLE-STRUCK N-ARY SUMMATION
# 2201; COMPLEMENT
# 2202; PARTIAL DIFFERENTIAL
# 2203; THERE EXISTS
# 2204; THERE DOES NOT EXIST
# 2211; N-ARY SUMMATION
# 2216; SET MINUS
# 221A; SQUARE ROOT
# 221B; CUBE ROOT
# 221C; FOURTH ROOT
# 221D; PROPORTIONAL TO
# 2226; NOT PARALLEL TO
# 222B; INTEGRAL
# 222C; DOUBLE INTEGRAL
# 222D; TRIPLE INTEGRAL
# 222E; CONTOUR INTEGRAL
# 222F; SURFACE INTEGRAL
# 2230; VOLUME INTEGRAL
# 2231; CLOCKWISE INTEGRAL
# 2232; CLOCKWISE CONTOUR INTEGRAL
# 2233; ANTICLOCKWISE CONTOUR INTEGRAL
# 2239; EXCESS
# 223B; HOMOTHETIC
# 223E; INVERTED LAZY S
# 223F; SINE WAVE
# 2240; WREATH PRODUCT
# 2241; NOT TILDE
# 2242; MINUS TILDE
# 2244; NOT ASYMPTOTICALLY EQUAL TO
# 2246; APPROXIMATELY BUT NOT ACTUALLY EQUAL TO
# 2247; NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
# 2248; ALMOST EQUAL TO
# 2249; NOT ALMOST EQUAL TO
# 224A; ALMOST EQUAL OR EQUAL TO
# 224B; TRIPLE TILDE
# 225F; QUESTIONED EQUAL TO
# 2260; NOT EQUAL TO
# 2262; NOT IDENTICAL TO
# 228C; MULTISET
# 22A7; MODELS
# 22AA; TRIPLE VERTICAL BAR RIGHT TURNSTILE
# 22AC; DOES NOT PROVE
# 22AD; NOT TRUE
# 22AE; DOES NOT FORCE
# 22AF; NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
# 22BE; RIGHT ANGLE WITH ARC
# 22BF; RIGHT TRIANGLE
# 22F5; ELEMENT OF WITH DOT ABOVE
# 22F8; ELEMENT OF WITH UNDERBAR
# 22F9; ELEMENT OF WITH TWO HORIZONTAL STROKES
# 22FF; Z NOTATION BAG MEMBERSHIP
# 2320; TOP HALF INTEGRAL
# 2321; BOTTOM HALF INTEGRAL
# 27C0; THREE DIMENSIONAL ANGLE
# 27CC; LONG DIVISION
# 27D3; LOWER RIGHT CORNER WITH DOT
# 27D4; UPPER LEFT CORNER WITH DOT
# 299C; RIGHT ANGLE VARIANT WITH SQUARE
# 299D; MEASURED RIGHT ANGLE WITH DOT
# 299E; ANGLE WITH S INSIDE
# 299F; ACUTE ANGLE
# 29A2; TURNED ANGLE
# 29A6; OBLIQUE ANGLE OPENING UP
# 29A7; OBLIQUE ANGLE OPENING DOWN
# 29C2; CIRCLE WITH SMALL CIRCLE TO THE RIGHT
# 29C3; CIRCLE WITH TWO HORIZONTAL STROKES TO THE RIGHT
# 29C9; TWO JOINED SQUARES
# 29CE; RIGHT TRIANGLE ABOVE LEFT TRIANGLE
# 29DC; INCOMPLETE INFINITY
# 29E1; INCREASES AS
# 29E3; EQUALS SIGN AND SLANTED PARALLEL
# 29E4; EQUALS SIGN AND SLANTED PARALLEL WITH TILDE ABOVE
# 29E5; IDENTICAL TO AND SLANTED PARALLEL
# 29F4; RULE-DELAYED
# 29F6; SOLIDUS WITH OVERBAR
# 29F7; REVERSE SOLIDUS WITH HORIZONTAL STROKE
# 2A0A; MODULO TWO SUM
# 2A0B; SUMMATION WITH INTEGRAL
# 2A0C; QUADRUPLE INTEGRAL OPERATOR
# 2A0D; FINITE PART INTEGRAL
# 2A0E; INTEGRAL WITH DOUBLE STROKE
# 2A0F; INTEGRAL AVERAGE WITH SLASH
# 2A10; CIRCULATION FUNCTION
# 2A11; ANTICLOCKWISE INTEGRATION
# 2A12; LINE INTEGRATION WITH RECTANGULAR PATH AROUND POLE
# 2A13; LINE INTEGRATION WITH SEMICIRCULAR PATH AROUND POLE
# 2A14; LINE INTEGRATION NOT INCLUDING THE POLE
# 2A15; INTEGRAL AROUND A POINT OPERATOR
# 2A16; QUATERNION INTEGRAL OPERATOR
# 2A17; INTEGRAL WITH LEFTWARDS ARROW WITH HOOK
# 2A18; INTEGRAL WITH TIMES SIGN
# 2A19; INTEGRAL WITH INTERSECTION
# 2A1A; INTEGRAL WITH UNION
# 2A1B; INTEGRAL WITH OVERBAR
# 2A1C; INTEGRAL WITH UNDERBAR
# 2A1E; LARGE LEFT TRIANGLE OPERATOR
# 2A1F; Z NOTATION SCHEMA COMPOSITION
# 2A20; Z NOTATION SCHEMA PIPING
# 2A21; Z NOTATION SCHEMA PROJECTION
# 2A24; PLUS SIGN WITH TILDE ABOVE
# 2A26; PLUS SIGN WITH TILDE BELOW
# 2A29; MINUS SIGN WITH COMMA ABOVE
# 2A3E; Z NOTATION RELATIONAL COMPOSITION
# 2A57; SLOPING LARGE OR
# 2A58; SLOPING LARGE AND
# 2A6A; TILDE OPERATOR WITH DOT ABOVE
# 2A6B; TILDE OPERATOR WITH RISING DOTS
# 2A6C; SIMILAR MINUS SIMILAR
# 2A6D; CONGRUENT WITH DOT ABOVE
# 2A6F; ALMOST EQUAL TO WITH CIRCUMFLEX ACCENT
# 2A70; APPROXIMATELY EQUAL OR EQUAL TO
# 2A73; EQUALS SIGN ABOVE TILDE OPERATOR
# 2A74; DOUBLE COLON EQUAL
# 2AA3; DOUBLE NESTED LESS-THAN WITH UNDERBAR
# 2ADC; FORKING
# 2AE2; VERTICAL BAR TRIPLE RIGHT TURNSTILE
# 2AE6; LONG DASH FROM LEFT MEMBER OF DOUBLE VERTICAL
# 2AF3; PARALLEL WITH TILDE OPERATOR
# 2AFB; TRIPLE SOLIDUS BINARY RELATION
# 2AFD; DOUBLE SOLIDUS OPERATOR
# 1D6DB; MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
# 1D715; MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
# 1D74F; MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
# 1D789; MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
# 1D7C3; MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
# EOF

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,212 @@
# PropertyAliases-14.0.0.txt
# Date: 2021-03-08, 19:35:48 GMT
# © 2021 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
#
# This file contains aliases for properties used in the UCD.
# These names can be used for XML formats of UCD data, for regular-expression
# property tests, and other programmatic textual descriptions of Unicode data.
#
# The names may be translated in appropriate environments, and additional
# aliases may be useful.
#
# FORMAT
#
# Each line has two or more fields, separated by semicolons.
#
# First Field: The first field is the short name for the property.
# It is typically an abbreviation, but in a number of cases it is simply
# a duplicate of the "long name" in the second field.
# For Unihan database tags, the short name is actually a longer string than
# the tag specified in the second field.
#
# Second Field: The second field is the long name for the property,
# typically the formal name used in documentation about the property.
#
# The above are the preferred aliases. Other aliases may be listed in additional fields.
#
# Loose matching should be applied to all property names and property values, with
# the exception of String Property values. With loose matching of property names and
# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
#
# NOTE: Property value names are NOT unique across properties. For example:
#
# AL means Arabic Letter for the Bidi_Class property, and
# AL means Above_Left for the Combining_Class property, and
# AL means Alphabetic for the Line_Break property.
#
# In addition, some property names may be the same as some property value names.
# For example:
#
# sc means the Script property, and
# Sc means the General_Category property value Currency_Symbol (Sc)
#
# The combination of property value and property name is, however, unique.
#
# For more information, see UAX #44, Unicode Character Database, and
# UTS #18, Unicode Regular Expressions.
# ================================================
# ================================================
# Numeric Properties
# ================================================
cjkAccountingNumeric ; kAccountingNumeric
cjkOtherNumeric ; kOtherNumeric
cjkPrimaryNumeric ; kPrimaryNumeric
nv ; Numeric_Value
# ================================================
# String Properties
# ================================================
cf ; Case_Folding
cjkCompatibilityVariant ; kCompatibilityVariant
dm ; Decomposition_Mapping
FC_NFKC ; FC_NFKC_Closure
lc ; Lowercase_Mapping
NFKC_CF ; NFKC_Casefold
scf ; Simple_Case_Folding ; sfc
slc ; Simple_Lowercase_Mapping
stc ; Simple_Titlecase_Mapping
suc ; Simple_Uppercase_Mapping
tc ; Titlecase_Mapping
uc ; Uppercase_Mapping
# ================================================
# Miscellaneous Properties
# ================================================
bmg ; Bidi_Mirroring_Glyph
bpb ; Bidi_Paired_Bracket
cjkIICore ; kIICore
cjkIRG_GSource ; kIRG_GSource
cjkIRG_HSource ; kIRG_HSource
cjkIRG_JSource ; kIRG_JSource
cjkIRG_KPSource ; kIRG_KPSource
cjkIRG_KSource ; kIRG_KSource
cjkIRG_MSource ; kIRG_MSource
cjkIRG_SSource ; kIRG_SSource
cjkIRG_TSource ; kIRG_TSource
cjkIRG_UKSource ; kIRG_UKSource
cjkIRG_USource ; kIRG_USource
cjkIRG_VSource ; kIRG_VSource
cjkRSUnicode ; kRSUnicode ; Unicode_Radical_Stroke; URS
EqUIdeo ; Equivalent_Unified_Ideograph
isc ; ISO_Comment
JSN ; Jamo_Short_Name
na ; Name
na1 ; Unicode_1_Name
Name_Alias ; Name_Alias
scx ; Script_Extensions
# ================================================
# Catalog Properties
# ================================================
age ; Age
blk ; Block
sc ; Script
# ================================================
# Enumerated Properties
# ================================================
bc ; Bidi_Class
bpt ; Bidi_Paired_Bracket_Type
ccc ; Canonical_Combining_Class
dt ; Decomposition_Type
ea ; East_Asian_Width
gc ; General_Category
GCB ; Grapheme_Cluster_Break
hst ; Hangul_Syllable_Type
InPC ; Indic_Positional_Category
InSC ; Indic_Syllabic_Category
jg ; Joining_Group
jt ; Joining_Type
lb ; Line_Break
NFC_QC ; NFC_Quick_Check
NFD_QC ; NFD_Quick_Check
NFKC_QC ; NFKC_Quick_Check
NFKD_QC ; NFKD_Quick_Check
nt ; Numeric_Type
SB ; Sentence_Break
vo ; Vertical_Orientation
WB ; Word_Break
# ================================================
# Binary Properties
# ================================================
AHex ; ASCII_Hex_Digit
Alpha ; Alphabetic
Bidi_C ; Bidi_Control
Bidi_M ; Bidi_Mirrored
Cased ; Cased
CE ; Composition_Exclusion
CI ; Case_Ignorable
Comp_Ex ; Full_Composition_Exclusion
CWCF ; Changes_When_Casefolded
CWCM ; Changes_When_Casemapped
CWKCF ; Changes_When_NFKC_Casefolded
CWL ; Changes_When_Lowercased
CWT ; Changes_When_Titlecased
CWU ; Changes_When_Uppercased
Dash ; Dash
Dep ; Deprecated
DI ; Default_Ignorable_Code_Point
Dia ; Diacritic
EBase ; Emoji_Modifier_Base
EComp ; Emoji_Component
EMod ; Emoji_Modifier
Emoji ; Emoji
EPres ; Emoji_Presentation
Ext ; Extender
ExtPict ; Extended_Pictographic
Gr_Base ; Grapheme_Base
Gr_Ext ; Grapheme_Extend
Gr_Link ; Grapheme_Link
Hex ; Hex_Digit
Hyphen ; Hyphen
IDC ; ID_Continue
Ideo ; Ideographic
IDS ; ID_Start
IDSB ; IDS_Binary_Operator
IDST ; IDS_Trinary_Operator
Join_C ; Join_Control
LOE ; Logical_Order_Exception
Lower ; Lowercase
Math ; Math
NChar ; Noncharacter_Code_Point
OAlpha ; Other_Alphabetic
ODI ; Other_Default_Ignorable_Code_Point
OGr_Ext ; Other_Grapheme_Extend
OIDC ; Other_ID_Continue
OIDS ; Other_ID_Start
OLower ; Other_Lowercase
OMath ; Other_Math
OUpper ; Other_Uppercase
Pat_Syn ; Pattern_Syntax
Pat_WS ; Pattern_White_Space
PCM ; Prepended_Concatenation_Mark
QMark ; Quotation_Mark
Radical ; Radical
RI ; Regional_Indicator
SD ; Soft_Dotted
STerm ; Sentence_Terminal
Term ; Terminal_Punctuation
UIdeo ; Unified_Ideograph
Upper ; Uppercase
VS ; Variation_Selector
WSpace ; White_Space ; space
XIDC ; XID_Continue
XIDS ; XID_Start
XO_NFC ; Expands_On_NFC
XO_NFD ; Expands_On_NFD
XO_NFKC ; Expands_On_NFKC
XO_NFKD ; Expands_On_NFKD
# ================================================
# Total: 129
# EOF

View File

@ -2,7 +2,7 @@
* A program for testing the Unicode property table *
***************************************************/
/* Copyright (c) University of Cambridge 2008-2021 */
/* Copyright (c) University of Cambridge 2008-2022 */
/* Compile thus:
@ -14,10 +14,10 @@
*/
/* This is a hacked-up program for testing the Unicode properties tables of
PCRE2. It can also be used for finding characters with certain properties.
I wrote it to help with debugging PCRE, and have added things that I found
useful, in a rather haphazard way. The code has never been seriously tidied or
checked for robustness, but it shouldn't now give compiler warnings.
PCRE2. It can also be used for finding characters with certain properties. I
wrote it to help with debugging, and have added things that I found useful, in
a rather haphazard way. The code has never been seriously tidied or checked for
robustness, but it shouldn't now give compiler warnings.
There is only one option: "-s". If given, it applies only to the "findprop"
command. It causes the UTF-8 sequence of bytes that encode the character to be
@ -33,31 +33,31 @@ return code is always zero.
There are three commands:
"findprop" must be followed by a space-separated list of Unicode code points as
hex numbers, either without any prefix or starting with "U+", or as individual
UTF-8 characters preceded by '+'. For example:
The command "findprop" must be followed by a space-separated list of Unicode
code points as hex numbers, either without any prefix or starting with "U+", or
as individual UTF-8 characters preceded by '+'. For example:
findprop U+1234 5Abc +?
The output is one line per character, giving its Unicode properties followed by
its other case or cases if one or more exist, followed by its Script Extension
list if it is not just the same as the base script. This list is in square
brackets. The properties are:
The output is one long line per character, listing Unicode properties that have
values, followed by its other case or cases if one or more exist, followed by
its Script Extension list if there is one. This list is in square brackets. A
second list in square brackets gives all the Boolean properties of the
character. The properties that come first are:
Bidi control shown as '*' if true
Bidi class e.g. NSM (most common is L)
General type e.g. Letter
Specific type e.g. Upper case letter
Script e.g. Medefaidrin
Grapheme break type e.g. Extend (most common is Other)
The scripts names are all in lower case, with underscores removed, because
that's how they are stored for "loose" matching.
Script names and Boolean property names are all in lower case, with underscores
and hyphens removed, because that's how they are stored for "loose" matching.
"find" must be followed by a list of property names and their values. The
values are case-sensitive, except for bidi class. This finds characters that
have those properties. If multiple properties are listed, they must all be
matched. Currently supported:
The command "find" must be followed by a list of property types and their
values. The values are case-sensitive, except for bidi class. This finds
characters that have those properties. If multiple properties are listed, they
must all be matched. Currently supported:
script <name> The character must have this script property. Only one
such script may be given.
@ -67,18 +67,19 @@ matched. Currently supported:
type <abbrev> The character's specific type (e.g. Lu or Nd) must match.
gbreak <name> The grapheme break property must match.
bidi <class> The character's bidi class must match.
bidi_control The character must be a bidi control character
bool <name> The character's Boolean property list must contain this
property.
If a <name> or <abbrev> is preceded by !, the value must NOT be present. For
Script Extensions, there may be a mixture of positive and negative
requirements. All must be satisfied.
Script Extensions and Boolean properties, there may be a mixture of positive
and negative requirements. All must be satisfied.
Sequences of two or more characters are shown as ranges, for example
U+0041..U+004A. No more than 100 lines are are output. If there are more
characters, the list ends with ...
"list" must be followed by one of property names script, type, gbreak or bidi.
The defined values for that property are listed. */
The command "list" must be followed by one of property names script, bool,
type, gbreak or bidi. The defined values for that property are listed. */
#ifdef HAVE_CONFIG_H
@ -296,27 +297,28 @@ return isatty(fileno(stdin));
/*************************************************
* Get script name from ucp ident *
* Get name from ucp ident *
*************************************************/
/* The utt table contains both the full script names and the 4-letter
abbreviations. So search for both and use the longer if two are found, unless
the first one is only 3 characters (some scripts have 3-character names). If
this were not just a test program it might be worth making some kind of reverse
/* The utt table contains both full names and abbreviations. So search for both
and use the longer if two are found, unless the first one is only 3 characters
and we are looking for a script (some scripts have 3-character names). If this
were not just a test program it might be worth making some kind of reverse
index. */
static const char *
get_scriptname(int script)
get_propname(int prop, int type)
{
size_t i, j, len;
size_t foundlist[2];
const char *yield;
int typex = (type == PT_SC)? PT_SCX : type;
j = 0;
for (i = 0; i < PRIV(utt_size); i++)
{
const ucp_type_table *u = PRIV(utt) + i;
if ((u->type == PT_SCX || u->type == PT_SC) && u->value == script)
if ((u->type == type || u->type == typex) && u->value == prop)
{
foundlist[j++] = i;
if (j >= 2) break;
@ -332,10 +334,11 @@ for (i = 0; i < j; i++)
{
const char *s = PRIV(utt_names) + (PRIV(utt) + foundlist[i])->name_offset;
size_t sl = strlen(s);
if (sl > len)
{
yield = s;
if (sl == 3) break;
if (sl == 3 && type == PT_SC) break;
len = sl;
}
}
@ -357,15 +360,15 @@ int script = UCD_SCRIPT(c);
int scriptx = UCD_SCRIPTX(c);
int gbprop = UCD_GRAPHBREAK(c);
int bidi = UCD_BIDICLASS(c);
int bidicontrol = UCD_BIDICONTROL(c);
unsigned int othercase = UCD_OTHERCASE(c);
int caseset = UCD_CASESET(c);
int bprops = UCD_BPROPS(c);
const unsigned char *fulltypename = US"??";
const unsigned char *typename = US"??";
const unsigned char *graphbreak = US"??";
const unsigned char *bidiclass = US"??";
const unsigned char *scriptname = CUS get_scriptname(script);
const unsigned char *scriptname = CUS get_propname(script, PT_SC);
switch (type)
{
@ -462,8 +465,8 @@ switch(bidi)
default: bidiclass = US"???"; break;
}
printf("U+%04X %c%s %s: %s, %s, %s", c, bidicontrol? '*':' ', bidiclass,
typename, fulltypename, scriptname, graphbreak);
printf("U+%04X %s %s: %s, %s, %s", c, bidiclass, typename, fulltypename,
scriptname, graphbreak);
if (is_just_one && othercase != c)
{
@ -487,7 +490,22 @@ if (scriptx != 0)
for (int i = 0; i < ucp_Unknown; i++)
if (MAPBIT(p, i) != 0)
{
printf("%s%s", sep, get_scriptname(i));
printf("%s%s", sep, get_propname(i, PT_SC));
sep = ", ";
}
printf("]");
}
if (bprops != 0)
{
const char *sep = "";
const uint32_t *p = PRIV(ucd_boolprop_sets) +
bprops * ucd_boolprop_sets_item_size;
printf(", [");
for (int i = 0; i < ucp_Bprop_Count; i++)
if (MAPBIT(p, i) != 0)
{
printf("%s%s", sep, get_propname(i, PT_BOOL));
sep = ", ";
}
printf("]");
@ -512,12 +530,14 @@ printf("\n");
static void
find_chars(unsigned char *s)
{
unsigned char name[24];
unsigned char value[24];
unsigned char name[128];
unsigned char value[128];
unsigned char *t;
unsigned int count= 0;
int scriptx_list[24];
int scriptx_list[128];
unsigned int scriptx_count = 0;
int bprop_list[128];
unsigned int bprop_count = 0;
uint32_t i, c;
int script = -1;
int type = -1;
@ -543,13 +563,18 @@ while (*s != 0)
while (isspace(*s)) s++;
value_start = s;
for (t = value; *s != 0 && !isspace(*s); s++) *t++ = *s;
for (t = value; *s != 0 && !isspace(*s); s++)
{
if (*s != '_' && *s != '-') *t++ = *s;
}
*t = 0;
while (isspace(*s)) s++;
if (strcmp(CS name, "script") == 0 ||
strcmp(CS name, "scriptx") == 0)
{
for (t = value; *t != 0; t++) *t = tolower(*t);
if (value[0] == '!')
{
if (name[6] == 'x') scriptx_not = TRUE;
@ -560,7 +585,7 @@ while (*s != 0)
for (i = 0; i < PRIV(utt_size); i++)
{
const ucp_type_table *u = PRIV(utt) + i;
if (u->type == PT_SCX && strcmp(CS(value + offset),
if ((u->type == PT_SCX || u->type == PT_SC) && strcmp(CS(value + offset),
PRIV(utt_names) + u->name_offset) == 0)
{
c = u->value;
@ -587,6 +612,33 @@ while (*s != 0)
}
}
else if (strcmp(CS name, "bool") == 0)
{
int not = 1;
if (value[0] == '!')
{
not = -1;
offset = 1;
}
for (i = 0; i < PRIV(utt_size); i++)
{
const ucp_type_table *u = PRIV(utt) + i;
if (u->type == PT_BOOL && strcmp(CS(value + offset),
PRIV(utt_names) + u->name_offset) == 0)
{
bprop_list[bprop_count++] = u->value * not;
break;
}
}
if (i >= PRIV(utt_size))
{
printf("** Unrecognized property name \"%s\"\n", value);
return;
}
}
else if (strcmp(CS name, "type") == 0)
{
if (type >= 0)
@ -681,13 +733,6 @@ while (*s != 0)
}
}
else if (strcmp(CS name, "bidi_control") == 0 ||
strcmp(CS name, "bidicontrol") == 0)
{
bidicontrol = TRUE;
s = value_start; /* No data */
}
else
{
printf("** Unrecognized property name \"%s\"\n", name);
@ -695,8 +740,8 @@ while (*s != 0)
}
}
if (script < 0 && scriptx_count == 0 && type < 0 && gbreak < 0 &&
bidiclass < 0 && !bidicontrol)
if (script < 0 && scriptx_count == 0 && bprop_count == 0 && type < 0 &&
gbreak < 0 && bidiclass < 0)
{
printf("** No properties specified\n");
return;
@ -708,48 +753,55 @@ for (c = 0; c <= 0x10ffff; c++)
if (scriptx_count > 0)
{
const uint32_t *bits_scriptx = NULL;
const uint32_t *bits_scriptx = PRIV(ucd_script_sets) + UCD_SCRIPTX(c);
unsigned int found = 0;
int scriptx = UCD_SCRIPTX(c);
if (scriptx < 0) bits_scriptx = PRIV(ucd_script_sets) - scriptx;
for (i = 0; i < scriptx_count; i++)
{
int x = scriptx_list[i]/32;
int y = scriptx_list[i]%32;
/* Positive requirment */
if (scriptx_list[i] >= 0)
{
if (scriptx >= 0)
{
if (scriptx == scriptx_list[i]) found++;
}
else
{
int x = scriptx_list[i]/32;
int y = scriptx_list[i]%32;
if ((bits_scriptx[x] & (1u<<y)) != 0) found++;
}
}
/* Negative requirement */
else
{
if (scriptx >= 0)
{
if (scriptx != -scriptx_list[i]) found++;
}
else
{
int x = scriptx_list[i]/32;
int y = scriptx_list[i]%32;
if ((bits_scriptx[x] & (1u<<y)) == 0) found++;
}
}
}
if (found != scriptx_count) continue;
}
if (bprop_count > 0)
{
const uint32_t *bits_bprop = PRIV(ucd_boolprop_sets) +
UCD_BPROPS(c) * ucd_boolprop_sets_item_size;
unsigned int found = 0;
for (i = 0; i < bprop_count; i++)
{
int x = bprop_list[i]/32;
int y = bprop_list[i]%32;
/* Positive requirement */
if (bprop_list[i] >= 0)
{
if ((bits_bprop[x] & (1u<<y)) != 0) found++;
}
/* Negative requirement */
else
{
if ((bits_bprop[-x] & (1u<<(-y))) == 0) found++;
}
}
if (found != bprop_count) continue;
}
if (type >= 0)
{
if (type_not)
@ -786,8 +838,6 @@ for (c = 0; c <= 0x10ffff; c++)
}
}
if (bidicontrol && UCD_BIDICONTROL(c) == 0) continue;
/* All conditions are met. Look for runs. */
ucd = GET_UCD(c);
@ -896,7 +946,14 @@ else if (strcmp(CS name, "list") == 0)
if (strcmp(CS name, "script") == 0 || strcmp(CS name, "scripts") == 0)
{
for (i = 0; i < PRIV(utt_size); i++)
if (PRIV(utt)[i].type == PT_SCX)
if (PRIV(utt)[i].type == PT_SCX || PRIV(utt)[i].type == PT_SC)
printf("%s\n", PRIV(utt_names) + PRIV(utt)[i].name_offset);
}
else if (strcmp(CS name, "bool") == 0)
{
for (i = 0; i < PRIV(utt_size); i++)
if (PRIV(utt)[i].type == PT_BOOL)
printf("%s\n", PRIV(utt_names) + PRIV(utt)[i].name_offset);
}

View File

@ -14,5 +14,6 @@ find bidi RLI
find bidi RLO
find bidi S
find bidi WS
find bidi_control
find script bopo
find bool prependedconcatenationmark
find bool pcm

View File

@ -1,139 +1,139 @@
findprop 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
U+0000 BN Control: Control, common, Control
U+0001 BN Control: Control, common, Control
U+0002 BN Control: Control, common, Control
U+0003 BN Control: Control, common, Control
U+0004 BN Control: Control, common, Control
U+0005 BN Control: Control, common, Control
U+0006 BN Control: Control, common, Control
U+0007 BN Control: Control, common, Control
U+0008 BN Control: Control, common, Control
U+0009 S Control: Control, common, Control
U+000A B Control: Control, common, LF
U+000B S Control: Control, common, Control
U+000C WS Control: Control, common, Control
U+000D B Control: Control, common, CR
U+000E BN Control: Control, common, Control
U+000F BN Control: Control, common, Control
U+0000 BN Control: Control, common, Control, [ascii]
U+0001 BN Control: Control, common, Control, [ascii]
U+0002 BN Control: Control, common, Control, [ascii]
U+0003 BN Control: Control, common, Control, [ascii]
U+0004 BN Control: Control, common, Control, [ascii]
U+0005 BN Control: Control, common, Control, [ascii]
U+0006 BN Control: Control, common, Control, [ascii]
U+0007 BN Control: Control, common, Control, [ascii]
U+0008 BN Control: Control, common, Control, [ascii]
U+0009 S Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
U+000A B Control: Control, common, LF, [ascii, patternwhitespace, whitespace]
U+000B S Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
U+000C WS Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
U+000D B Control: Control, common, CR, [ascii, patternwhitespace, whitespace]
U+000E BN Control: Control, common, Control, [ascii]
U+000F BN Control: Control, common, Control, [ascii]
findprop 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
U+0010 BN Control: Control, common, Control
U+0011 BN Control: Control, common, Control
U+0012 BN Control: Control, common, Control
U+0013 BN Control: Control, common, Control
U+0014 BN Control: Control, common, Control
U+0015 BN Control: Control, common, Control
U+0016 BN Control: Control, common, Control
U+0017 BN Control: Control, common, Control
U+0018 BN Control: Control, common, Control
U+0019 BN Control: Control, common, Control
U+001A BN Control: Control, common, Control
U+001B BN Control: Control, common, Control
U+001C B Control: Control, common, Control
U+001D B Control: Control, common, Control
U+001E B Control: Control, common, Control
U+001F S Control: Control, common, Control
U+0010 BN Control: Control, common, Control, [ascii]
U+0011 BN Control: Control, common, Control, [ascii]
U+0012 BN Control: Control, common, Control, [ascii]
U+0013 BN Control: Control, common, Control, [ascii]
U+0014 BN Control: Control, common, Control, [ascii]
U+0015 BN Control: Control, common, Control, [ascii]
U+0016 BN Control: Control, common, Control, [ascii]
U+0017 BN Control: Control, common, Control, [ascii]
U+0018 BN Control: Control, common, Control, [ascii]
U+0019 BN Control: Control, common, Control, [ascii]
U+001A BN Control: Control, common, Control, [ascii]
U+001B BN Control: Control, common, Control, [ascii]
U+001C B Control: Control, common, Control, [ascii]
U+001D B Control: Control, common, Control, [ascii]
U+001E B Control: Control, common, Control, [ascii]
U+001F S Control: Control, common, Control, [ascii]
findprop 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
U+0020 WS Separator: Space separator, common, Other
U+0021 ON Punctuation: Other punctuation, common, Other
U+0022 ON Punctuation: Other punctuation, common, Other
U+0023 ET Punctuation: Other punctuation, common, Other
U+0024 ET Symbol: Currency symbol, common, Other
U+0025 ET Punctuation: Other punctuation, common, Other
U+0026 ON Punctuation: Other punctuation, common, Other
U+0027 ON Punctuation: Other punctuation, common, Other
U+0028 ON Punctuation: Open punctuation, common, Other
U+0029 ON Punctuation: Close punctuation, common, Other
U+002A ON Punctuation: Other punctuation, common, Other
U+002B ES Symbol: Mathematical symbol, common, Other
U+002C CS Punctuation: Other punctuation, common, Other
U+002D ES Punctuation: Dash punctuation, common, Other
U+002E CS Punctuation: Other punctuation, common, Other
U+002F CS Punctuation: Other punctuation, common, Other
U+0020 WS Separator: Space separator, common, Other, [ascii, graphemebase, patternwhitespace, whitespace]
U+0021 ON Punctuation: Other punctuation, common, Other, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
U+0022 ON Punctuation: Other punctuation, common, Other, [ascii, graphemebase, patternsyntax, quotationmark]
U+0023 ET Punctuation: Other punctuation, common, Other, [ascii, emoji, emojicomponent, graphemebase, patternsyntax]
U+0024 ET Symbol: Currency symbol, common, Other, [ascii, graphemebase, patternsyntax]
U+0025 ET Punctuation: Other punctuation, common, Other, [ascii, graphemebase, patternsyntax]
U+0026 ON Punctuation: Other punctuation, common, Other, [ascii, graphemebase, patternsyntax]
U+0027 ON Punctuation: Other punctuation, common, Other, [ascii, caseignorable, graphemebase, patternsyntax, quotationmark]
U+0028 ON Punctuation: Open punctuation, common, Other, [ascii, bidimirrored, graphemebase, patternsyntax]
U+0029 ON Punctuation: Close punctuation, common, Other, [ascii, bidimirrored, graphemebase, patternsyntax]
U+002A ON Punctuation: Other punctuation, common, Other, [ascii, emoji, emojicomponent, graphemebase, patternsyntax]
U+002B ES Symbol: Mathematical symbol, common, Other, [ascii, graphemebase, math, patternsyntax]
U+002C CS Punctuation: Other punctuation, common, Other, [ascii, graphemebase, patternsyntax, terminalpunctuation]
U+002D ES Punctuation: Dash punctuation, common, Other, [ascii, dash, graphemebase, patternsyntax]
U+002E CS Punctuation: Other punctuation, common, Other, [ascii, caseignorable, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
U+002F CS Punctuation: Other punctuation, common, Other, [ascii, graphemebase, patternsyntax]
findprop 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
U+0030 EN Number: Decimal number, common, Other
U+0031 EN Number: Decimal number, common, Other
U+0032 EN Number: Decimal number, common, Other
U+0033 EN Number: Decimal number, common, Other
U+0034 EN Number: Decimal number, common, Other
U+0035 EN Number: Decimal number, common, Other
U+0036 EN Number: Decimal number, common, Other
U+0037 EN Number: Decimal number, common, Other
U+0038 EN Number: Decimal number, common, Other
U+0039 EN Number: Decimal number, common, Other
U+003A CS Punctuation: Other punctuation, common, Other
U+003B ON Punctuation: Other punctuation, common, Other
U+003C ON Symbol: Mathematical symbol, common, Other
U+003D ON Symbol: Mathematical symbol, common, Other
U+003E ON Symbol: Mathematical symbol, common, Other
U+003F ON Punctuation: Other punctuation, common, Other
U+0030 EN Number: Decimal number, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
U+0031 EN Number: Decimal number, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
U+0032 EN Number: Decimal number, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
U+0033 EN Number: Decimal number, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
U+0034 EN Number: Decimal number, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
U+0035 EN Number: Decimal number, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
U+0036 EN Number: Decimal number, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
U+0037 EN Number: Decimal number, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
U+0038 EN Number: Decimal number, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
U+0039 EN Number: Decimal number, common, Other, [ascii, asciihexdigit, emoji, emojicomponent, graphemebase, hexdigit, idcontinue, xidcontinue]
U+003A CS Punctuation: Other punctuation, common, Other, [ascii, caseignorable, graphemebase, patternsyntax, terminalpunctuation]
U+003B ON Punctuation: Other punctuation, common, Other, [ascii, graphemebase, patternsyntax, terminalpunctuation]
U+003C ON Symbol: Mathematical symbol, common, Other, [ascii, bidimirrored, graphemebase, math, patternsyntax]
U+003D ON Symbol: Mathematical symbol, common, Other, [ascii, graphemebase, math, patternsyntax]
U+003E ON Symbol: Mathematical symbol, common, Other, [ascii, bidimirrored, graphemebase, math, patternsyntax]
U+003F ON Punctuation: Other punctuation, common, Other, [ascii, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
findprop 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
U+0040 ON Punctuation: Other punctuation, common, Other
U+0041 L Letter: Upper case letter, latin, Other, U+0061
U+0042 L Letter: Upper case letter, latin, Other, U+0062
U+0043 L Letter: Upper case letter, latin, Other, U+0063
U+0044 L Letter: Upper case letter, latin, Other, U+0064
U+0045 L Letter: Upper case letter, latin, Other, U+0065
U+0046 L Letter: Upper case letter, latin, Other, U+0066
U+0047 L Letter: Upper case letter, latin, Other, U+0067
U+0048 L Letter: Upper case letter, latin, Other, U+0068
U+0049 L Letter: Upper case letter, latin, Other, U+0069
U+004A L Letter: Upper case letter, latin, Other, U+006A
U+004B L Letter: Upper case letter, latin, Other, U+006B, U+212A
U+004C L Letter: Upper case letter, latin, Other, U+006C
U+004D L Letter: Upper case letter, latin, Other, U+006D
U+004E L Letter: Upper case letter, latin, Other, U+006E
U+004F L Letter: Upper case letter, latin, Other, U+006F
U+0040 ON Punctuation: Other punctuation, common, Other, [ascii, graphemebase, patternsyntax]
U+0041 L Letter: Upper case letter, latin, Other, U+0061, [ascii, asciihexdigit, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, hexdigit, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0042 L Letter: Upper case letter, latin, Other, U+0062, [ascii, asciihexdigit, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, hexdigit, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0043 L Letter: Upper case letter, latin, Other, U+0063, [ascii, asciihexdigit, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, hexdigit, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0044 L Letter: Upper case letter, latin, Other, U+0064, [ascii, asciihexdigit, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, hexdigit, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0045 L Letter: Upper case letter, latin, Other, U+0065, [ascii, asciihexdigit, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, hexdigit, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0046 L Letter: Upper case letter, latin, Other, U+0066, [ascii, asciihexdigit, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, hexdigit, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0047 L Letter: Upper case letter, latin, Other, U+0067, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0048 L Letter: Upper case letter, latin, Other, U+0068, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0049 L Letter: Upper case letter, latin, Other, U+0069, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+004A L Letter: Upper case letter, latin, Other, U+006A, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+004B L Letter: Upper case letter, latin, Other, U+006B, U+212A, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+004C L Letter: Upper case letter, latin, Other, U+006C, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+004D L Letter: Upper case letter, latin, Other, U+006D, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+004E L Letter: Upper case letter, latin, Other, U+006E, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+004F L Letter: Upper case letter, latin, Other, U+006F, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
findprop 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
U+0050 L Letter: Upper case letter, latin, Other, U+0070
U+0051 L Letter: Upper case letter, latin, Other, U+0071
U+0052 L Letter: Upper case letter, latin, Other, U+0072
U+0053 L Letter: Upper case letter, latin, Other, U+0073, U+017F
U+0054 L Letter: Upper case letter, latin, Other, U+0074
U+0055 L Letter: Upper case letter, latin, Other, U+0075
U+0056 L Letter: Upper case letter, latin, Other, U+0076
U+0057 L Letter: Upper case letter, latin, Other, U+0077
U+0058 L Letter: Upper case letter, latin, Other, U+0078
U+0059 L Letter: Upper case letter, latin, Other, U+0079
U+005A L Letter: Upper case letter, latin, Other, U+007A
U+005B ON Punctuation: Open punctuation, common, Other
U+005C ON Punctuation: Other punctuation, common, Other
U+005D ON Punctuation: Close punctuation, common, Other
U+005E ON Symbol: Modifier symbol, common, Other
U+005F ON Punctuation: Connector punctuation, common, Other
U+0050 L Letter: Upper case letter, latin, Other, U+0070, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0051 L Letter: Upper case letter, latin, Other, U+0071, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0052 L Letter: Upper case letter, latin, Other, U+0072, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0053 L Letter: Upper case letter, latin, Other, U+0073, U+017F, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0054 L Letter: Upper case letter, latin, Other, U+0074, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0055 L Letter: Upper case letter, latin, Other, U+0075, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0056 L Letter: Upper case letter, latin, Other, U+0076, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0057 L Letter: Upper case letter, latin, Other, U+0077, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0058 L Letter: Upper case letter, latin, Other, U+0078, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0059 L Letter: Upper case letter, latin, Other, U+0079, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+005A L Letter: Upper case letter, latin, Other, U+007A, [ascii, alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+005B ON Punctuation: Open punctuation, common, Other, [ascii, bidimirrored, graphemebase, patternsyntax]
U+005C ON Punctuation: Other punctuation, common, Other, [ascii, graphemebase, patternsyntax]
U+005D ON Punctuation: Close punctuation, common, Other, [ascii, bidimirrored, graphemebase, patternsyntax]
U+005E ON Symbol: Modifier symbol, common, Other, [ascii, caseignorable, diacritic, graphemebase, math, patternsyntax]
U+005F ON Punctuation: Connector punctuation, common, Other, [ascii, graphemebase, idcontinue, xidcontinue]
findprop 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
U+0060 ON Symbol: Modifier symbol, common, Other
U+0061 L Letter: Lower case letter, latin, Other, U+0041
U+0062 L Letter: Lower case letter, latin, Other, U+0042
U+0063 L Letter: Lower case letter, latin, Other, U+0043
U+0064 L Letter: Lower case letter, latin, Other, U+0044
U+0065 L Letter: Lower case letter, latin, Other, U+0045
U+0066 L Letter: Lower case letter, latin, Other, U+0046
U+0067 L Letter: Lower case letter, latin, Other, U+0047
U+0068 L Letter: Lower case letter, latin, Other, U+0048
U+0069 L Letter: Lower case letter, latin, Other, U+0049
U+006A L Letter: Lower case letter, latin, Other, U+004A
U+006B L Letter: Lower case letter, latin, Other, U+004B, U+212A
U+006C L Letter: Lower case letter, latin, Other, U+004C
U+006D L Letter: Lower case letter, latin, Other, U+004D
U+006E L Letter: Lower case letter, latin, Other, U+004E
U+006F L Letter: Lower case letter, latin, Other, U+004F
U+0060 ON Symbol: Modifier symbol, common, Other, [ascii, caseignorable, diacritic, graphemebase, patternsyntax]
U+0061 L Letter: Lower case letter, latin, Other, U+0041, [ascii, asciihexdigit, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, hexdigit, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0062 L Letter: Lower case letter, latin, Other, U+0042, [ascii, asciihexdigit, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, hexdigit, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0063 L Letter: Lower case letter, latin, Other, U+0043, [ascii, asciihexdigit, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, hexdigit, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0064 L Letter: Lower case letter, latin, Other, U+0044, [ascii, asciihexdigit, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, hexdigit, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0065 L Letter: Lower case letter, latin, Other, U+0045, [ascii, asciihexdigit, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, hexdigit, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0066 L Letter: Lower case letter, latin, Other, U+0046, [ascii, asciihexdigit, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, hexdigit, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0067 L Letter: Lower case letter, latin, Other, U+0047, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0068 L Letter: Lower case letter, latin, Other, U+0048, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0069 L Letter: Lower case letter, latin, Other, U+0049, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, softdotted, xidcontinue, xidstart]
U+006A L Letter: Lower case letter, latin, Other, U+004A, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, softdotted, xidcontinue, xidstart]
U+006B L Letter: Lower case letter, latin, Other, U+004B, U+212A, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+006C L Letter: Lower case letter, latin, Other, U+004C, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+006D L Letter: Lower case letter, latin, Other, U+004D, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+006E L Letter: Lower case letter, latin, Other, U+004E, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+006F L Letter: Lower case letter, latin, Other, U+004F, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
findprop 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
U+0070 L Letter: Lower case letter, latin, Other, U+0050
U+0071 L Letter: Lower case letter, latin, Other, U+0051
U+0072 L Letter: Lower case letter, latin, Other, U+0052
U+0073 L Letter: Lower case letter, latin, Other, U+0053, U+017F
U+0074 L Letter: Lower case letter, latin, Other, U+0054
U+0075 L Letter: Lower case letter, latin, Other, U+0055
U+0076 L Letter: Lower case letter, latin, Other, U+0056
U+0077 L Letter: Lower case letter, latin, Other, U+0057
U+0078 L Letter: Lower case letter, latin, Other, U+0058
U+0079 L Letter: Lower case letter, latin, Other, U+0059
U+007A L Letter: Lower case letter, latin, Other, U+005A
U+007B ON Punctuation: Open punctuation, common, Other
U+007C ON Symbol: Mathematical symbol, common, Other
U+007D ON Punctuation: Close punctuation, common, Other
U+007E ON Symbol: Mathematical symbol, common, Other
U+007F BN Control: Control, common, Control
U+0070 L Letter: Lower case letter, latin, Other, U+0050, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0071 L Letter: Lower case letter, latin, Other, U+0051, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0072 L Letter: Lower case letter, latin, Other, U+0052, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0073 L Letter: Lower case letter, latin, Other, U+0053, U+017F, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0074 L Letter: Lower case letter, latin, Other, U+0054, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0075 L Letter: Lower case letter, latin, Other, U+0055, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0076 L Letter: Lower case letter, latin, Other, U+0056, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0077 L Letter: Lower case letter, latin, Other, U+0057, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0078 L Letter: Lower case letter, latin, Other, U+0058, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0079 L Letter: Lower case letter, latin, Other, U+0059, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+007A L Letter: Lower case letter, latin, Other, U+005A, [ascii, alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+007B ON Punctuation: Open punctuation, common, Other, [ascii, bidimirrored, graphemebase, patternsyntax]
U+007C ON Symbol: Mathematical symbol, common, Other, [ascii, graphemebase, math, patternsyntax]
U+007D ON Punctuation: Close punctuation, common, Other, [ascii, bidimirrored, graphemebase, patternsyntax]
U+007E ON Symbol: Mathematical symbol, common, Other, [ascii, graphemebase, math, patternsyntax]
U+007F BN Control: Control, common, Control, [ascii]
findprop 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
U+0080 BN Control: Control, common, Control
@ -141,7 +141,7 @@ U+0081 BN Control: Control, common, Control
U+0082 BN Control: Control, common, Control
U+0083 BN Control: Control, common, Control
U+0084 BN Control: Control, common, Control
U+0085 B Control: Control, common, Control
U+0085 B Control: Control, common, Control, [patternwhitespace, whitespace]
U+0086 BN Control: Control, common, Control
U+0087 BN Control: Control, common, Control
U+0088 BN Control: Control, common, Control
@ -170,240 +170,240 @@ U+009D BN Control: Control, common, Control
U+009E BN Control: Control, common, Control
U+009F BN Control: Control, common, Control
findprop a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af
U+00A0 CS Separator: Space separator, common, Other
U+00A1 ON Punctuation: Other punctuation, common, Other
U+00A2 ET Symbol: Currency symbol, common, Other
U+00A3 ET Symbol: Currency symbol, common, Other
U+00A4 ET Symbol: Currency symbol, common, Other
U+00A5 ET Symbol: Currency symbol, common, Other
U+00A6 ON Symbol: Other symbol, common, Other
U+00A7 ON Punctuation: Other punctuation, common, Other
U+00A8 ON Symbol: Modifier symbol, common, Other
U+00A9 ON Symbol: Other symbol, common, Extended Pictographic
U+00AA L Letter: Other letter, latin, Other
U+00AB ON Punctuation: Initial punctuation, common, Other
U+00AC ON Symbol: Mathematical symbol, common, Other
U+00AD BN Control: Format, common, Control
U+00AE ON Symbol: Other symbol, common, Extended Pictographic
U+00AF ON Symbol: Modifier symbol, common, Other
U+00A0 CS Separator: Space separator, common, Other, [graphemebase, whitespace]
U+00A1 ON Punctuation: Other punctuation, common, Other, [graphemebase, patternsyntax]
U+00A2 ET Symbol: Currency symbol, common, Other, [graphemebase, patternsyntax]
U+00A3 ET Symbol: Currency symbol, common, Other, [graphemebase, patternsyntax]
U+00A4 ET Symbol: Currency symbol, common, Other, [graphemebase, patternsyntax]
U+00A5 ET Symbol: Currency symbol, common, Other, [graphemebase, patternsyntax]
U+00A6 ON Symbol: Other symbol, common, Other, [graphemebase, patternsyntax]
U+00A7 ON Punctuation: Other punctuation, common, Other, [graphemebase, patternsyntax]
U+00A8 ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+00A9 ON Symbol: Other symbol, common, Extended Pictographic, [emoji, extendedpictographic, graphemebase, patternsyntax]
U+00AA L Letter: Other letter, latin, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00AB ON Punctuation: Initial punctuation, common, Other, [bidimirrored, graphemebase, patternsyntax, quotationmark]
U+00AC ON Symbol: Mathematical symbol, common, Other, [graphemebase, math, patternsyntax]
U+00AD BN Control: Format, common, Control, [caseignorable, defaultignorablecodepoint]
U+00AE ON Symbol: Other symbol, common, Extended Pictographic, [emoji, extendedpictographic, graphemebase, patternsyntax]
U+00AF ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
findprop b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf
U+00B0 ET Symbol: Other symbol, common, Other
U+00B1 ET Symbol: Mathematical symbol, common, Other
U+00B2 EN Number: Other number, common, Other
U+00B3 EN Number: Other number, common, Other
U+00B4 ON Symbol: Modifier symbol, common, Other
U+00B5 L Letter: Lower case letter, common, Other, U+03BC, U+039C
U+00B6 ON Punctuation: Other punctuation, common, Other
U+00B7 ON Punctuation: Other punctuation, common, Other
U+00B8 ON Symbol: Modifier symbol, common, Other
U+00B9 EN Number: Other number, common, Other
U+00BA L Letter: Other letter, latin, Other
U+00BB ON Punctuation: Final punctuation, common, Other
U+00BC ON Number: Other number, common, Other
U+00BD ON Number: Other number, common, Other
U+00BE ON Number: Other number, common, Other
U+00BF ON Punctuation: Other punctuation, common, Other
U+00B0 ET Symbol: Other symbol, common, Other, [graphemebase, patternsyntax]
U+00B1 ET Symbol: Mathematical symbol, common, Other, [graphemebase, math, patternsyntax]
U+00B2 EN Number: Other number, common, Other, [graphemebase]
U+00B3 EN Number: Other number, common, Other, [graphemebase]
U+00B4 ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+00B5 L Letter: Lower case letter, common, Other, U+03BC, U+039C, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00B6 ON Punctuation: Other punctuation, common, Other, [graphemebase, patternsyntax]
U+00B7 ON Punctuation: Other punctuation, common, Other, [caseignorable, diacritic, extender, graphemebase, idcontinue, xidcontinue]
U+00B8 ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+00B9 EN Number: Other number, common, Other, [graphemebase]
U+00BA L Letter: Other letter, latin, Other, [alphabetic, cased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00BB ON Punctuation: Final punctuation, common, Other, [bidimirrored, graphemebase, patternsyntax, quotationmark]
U+00BC ON Number: Other number, common, Other, [graphemebase]
U+00BD ON Number: Other number, common, Other, [graphemebase]
U+00BE ON Number: Other number, common, Other, [graphemebase]
U+00BF ON Punctuation: Other punctuation, common, Other, [graphemebase, patternsyntax]
findprop c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf
U+00C0 L Letter: Upper case letter, latin, Other, U+00E0
U+00C1 L Letter: Upper case letter, latin, Other, U+00E1
U+00C2 L Letter: Upper case letter, latin, Other, U+00E2
U+00C3 L Letter: Upper case letter, latin, Other, U+00E3
U+00C4 L Letter: Upper case letter, latin, Other, U+00E4
U+00C5 L Letter: Upper case letter, latin, Other, U+00E5, U+212B
U+00C6 L Letter: Upper case letter, latin, Other, U+00E6
U+00C7 L Letter: Upper case letter, latin, Other, U+00E7
U+00C8 L Letter: Upper case letter, latin, Other, U+00E8
U+00C9 L Letter: Upper case letter, latin, Other, U+00E9
U+00CA L Letter: Upper case letter, latin, Other, U+00EA
U+00CB L Letter: Upper case letter, latin, Other, U+00EB
U+00CC L Letter: Upper case letter, latin, Other, U+00EC
U+00CD L Letter: Upper case letter, latin, Other, U+00ED
U+00CE L Letter: Upper case letter, latin, Other, U+00EE
U+00CF L Letter: Upper case letter, latin, Other, U+00EF
U+00C0 L Letter: Upper case letter, latin, Other, U+00E0, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00C1 L Letter: Upper case letter, latin, Other, U+00E1, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00C2 L Letter: Upper case letter, latin, Other, U+00E2, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00C3 L Letter: Upper case letter, latin, Other, U+00E3, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00C4 L Letter: Upper case letter, latin, Other, U+00E4, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00C5 L Letter: Upper case letter, latin, Other, U+00E5, U+212B, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00C6 L Letter: Upper case letter, latin, Other, U+00E6, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00C7 L Letter: Upper case letter, latin, Other, U+00E7, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00C8 L Letter: Upper case letter, latin, Other, U+00E8, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00C9 L Letter: Upper case letter, latin, Other, U+00E9, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00CA L Letter: Upper case letter, latin, Other, U+00EA, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00CB L Letter: Upper case letter, latin, Other, U+00EB, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00CC L Letter: Upper case letter, latin, Other, U+00EC, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00CD L Letter: Upper case letter, latin, Other, U+00ED, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00CE L Letter: Upper case letter, latin, Other, U+00EE, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00CF L Letter: Upper case letter, latin, Other, U+00EF, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
findprop d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db dc dd de df
U+00D0 L Letter: Upper case letter, latin, Other, U+00F0
U+00D1 L Letter: Upper case letter, latin, Other, U+00F1
U+00D2 L Letter: Upper case letter, latin, Other, U+00F2
U+00D3 L Letter: Upper case letter, latin, Other, U+00F3
U+00D4 L Letter: Upper case letter, latin, Other, U+00F4
U+00D5 L Letter: Upper case letter, latin, Other, U+00F5
U+00D6 L Letter: Upper case letter, latin, Other, U+00F6
U+00D7 ON Symbol: Mathematical symbol, common, Other
U+00D8 L Letter: Upper case letter, latin, Other, U+00F8
U+00D9 L Letter: Upper case letter, latin, Other, U+00F9
U+00DA L Letter: Upper case letter, latin, Other, U+00FA
U+00DB L Letter: Upper case letter, latin, Other, U+00FB
U+00DC L Letter: Upper case letter, latin, Other, U+00FC
U+00DD L Letter: Upper case letter, latin, Other, U+00FD
U+00DE L Letter: Upper case letter, latin, Other, U+00FE
U+00DF L Letter: Lower case letter, latin, Other, U+1E9E
U+00D0 L Letter: Upper case letter, latin, Other, U+00F0, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00D1 L Letter: Upper case letter, latin, Other, U+00F1, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00D2 L Letter: Upper case letter, latin, Other, U+00F2, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00D3 L Letter: Upper case letter, latin, Other, U+00F3, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00D4 L Letter: Upper case letter, latin, Other, U+00F4, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00D5 L Letter: Upper case letter, latin, Other, U+00F5, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00D6 L Letter: Upper case letter, latin, Other, U+00F6, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00D7 ON Symbol: Mathematical symbol, common, Other, [graphemebase, math, patternsyntax]
U+00D8 L Letter: Upper case letter, latin, Other, U+00F8, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00D9 L Letter: Upper case letter, latin, Other, U+00F9, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00DA L Letter: Upper case letter, latin, Other, U+00FA, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00DB L Letter: Upper case letter, latin, Other, U+00FB, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00DC L Letter: Upper case letter, latin, Other, U+00FC, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00DD L Letter: Upper case letter, latin, Other, U+00FD, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00DE L Letter: Upper case letter, latin, Other, U+00FE, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+00DF L Letter: Lower case letter, latin, Other, U+1E9E, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
findprop e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ea eb ec ed ee ef
U+00E0 L Letter: Lower case letter, latin, Other, U+00C0
U+00E1 L Letter: Lower case letter, latin, Other, U+00C1
U+00E2 L Letter: Lower case letter, latin, Other, U+00C2
U+00E3 L Letter: Lower case letter, latin, Other, U+00C3
U+00E4 L Letter: Lower case letter, latin, Other, U+00C4
U+00E5 L Letter: Lower case letter, latin, Other, U+00C5, U+212B
U+00E6 L Letter: Lower case letter, latin, Other, U+00C6
U+00E7 L Letter: Lower case letter, latin, Other, U+00C7
U+00E8 L Letter: Lower case letter, latin, Other, U+00C8
U+00E9 L Letter: Lower case letter, latin, Other, U+00C9
U+00EA L Letter: Lower case letter, latin, Other, U+00CA
U+00EB L Letter: Lower case letter, latin, Other, U+00CB
U+00EC L Letter: Lower case letter, latin, Other, U+00CC
U+00ED L Letter: Lower case letter, latin, Other, U+00CD
U+00EE L Letter: Lower case letter, latin, Other, U+00CE
U+00EF L Letter: Lower case letter, latin, Other, U+00CF
U+00E0 L Letter: Lower case letter, latin, Other, U+00C0, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00E1 L Letter: Lower case letter, latin, Other, U+00C1, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00E2 L Letter: Lower case letter, latin, Other, U+00C2, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00E3 L Letter: Lower case letter, latin, Other, U+00C3, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00E4 L Letter: Lower case letter, latin, Other, U+00C4, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00E5 L Letter: Lower case letter, latin, Other, U+00C5, U+212B, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00E6 L Letter: Lower case letter, latin, Other, U+00C6, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00E7 L Letter: Lower case letter, latin, Other, U+00C7, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00E8 L Letter: Lower case letter, latin, Other, U+00C8, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00E9 L Letter: Lower case letter, latin, Other, U+00C9, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00EA L Letter: Lower case letter, latin, Other, U+00CA, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00EB L Letter: Lower case letter, latin, Other, U+00CB, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00EC L Letter: Lower case letter, latin, Other, U+00CC, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00ED L Letter: Lower case letter, latin, Other, U+00CD, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00EE L Letter: Lower case letter, latin, Other, U+00CE, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00EF L Letter: Lower case letter, latin, Other, U+00CF, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
findprop f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
U+00F0 L Letter: Lower case letter, latin, Other, U+00D0
U+00F1 L Letter: Lower case letter, latin, Other, U+00D1
U+00F2 L Letter: Lower case letter, latin, Other, U+00D2
U+00F3 L Letter: Lower case letter, latin, Other, U+00D3
U+00F4 L Letter: Lower case letter, latin, Other, U+00D4
U+00F5 L Letter: Lower case letter, latin, Other, U+00D5
U+00F6 L Letter: Lower case letter, latin, Other, U+00D6
U+00F7 ON Symbol: Mathematical symbol, common, Other
U+00F8 L Letter: Lower case letter, latin, Other, U+00D8
U+00F9 L Letter: Lower case letter, latin, Other, U+00D9
U+00FA L Letter: Lower case letter, latin, Other, U+00DA
U+00FB L Letter: Lower case letter, latin, Other, U+00DB
U+00FC L Letter: Lower case letter, latin, Other, U+00DC
U+00FD L Letter: Lower case letter, latin, Other, U+00DD
U+00FE L Letter: Lower case letter, latin, Other, U+00DE
U+00FF L Letter: Lower case letter, latin, Other, U+0178
U+00F0 L Letter: Lower case letter, latin, Other, U+00D0, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00F1 L Letter: Lower case letter, latin, Other, U+00D1, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00F2 L Letter: Lower case letter, latin, Other, U+00D2, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00F3 L Letter: Lower case letter, latin, Other, U+00D3, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00F4 L Letter: Lower case letter, latin, Other, U+00D4, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00F5 L Letter: Lower case letter, latin, Other, U+00D5, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00F6 L Letter: Lower case letter, latin, Other, U+00D6, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00F7 ON Symbol: Mathematical symbol, common, Other, [graphemebase, math, patternsyntax]
U+00F8 L Letter: Lower case letter, latin, Other, U+00D8, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00F9 L Letter: Lower case letter, latin, Other, U+00D9, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00FA L Letter: Lower case letter, latin, Other, U+00DA, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00FB L Letter: Lower case letter, latin, Other, U+00DB, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00FC L Letter: Lower case letter, latin, Other, U+00DC, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00FD L Letter: Lower case letter, latin, Other, U+00DD, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00FE L Letter: Lower case letter, latin, Other, U+00DE, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+00FF L Letter: Lower case letter, latin, Other, U+0178, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
findprop 0100 0101 0102 0103 0104 0105 0106
U+0100 L Letter: Upper case letter, latin, Other, U+0101
U+0101 L Letter: Lower case letter, latin, Other, U+0100
U+0102 L Letter: Upper case letter, latin, Other, U+0103
U+0103 L Letter: Lower case letter, latin, Other, U+0102
U+0104 L Letter: Upper case letter, latin, Other, U+0105
U+0105 L Letter: Lower case letter, latin, Other, U+0104
U+0106 L Letter: Upper case letter, latin, Other, U+0107
U+0100 L Letter: Upper case letter, latin, Other, U+0101, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0101 L Letter: Lower case letter, latin, Other, U+0100, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0102 L Letter: Upper case letter, latin, Other, U+0103, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0103 L Letter: Lower case letter, latin, Other, U+0102, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0104 L Letter: Upper case letter, latin, Other, U+0105, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+0105 L Letter: Lower case letter, latin, Other, U+0104, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+0106 L Letter: Upper case letter, latin, Other, U+0107, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
findprop ffe0 ffe1 ffe2 ffe3 ffe4 ffe5 ffe6 ffe7
U+FFE0 ET Symbol: Currency symbol, common, Other
U+FFE1 ET Symbol: Currency symbol, common, Other
U+FFE2 ON Symbol: Mathematical symbol, common, Other
U+FFE3 ON Symbol: Modifier symbol, common, Other
U+FFE4 ON Symbol: Other symbol, common, Other
U+FFE5 ET Symbol: Currency symbol, common, Other
U+FFE6 ET Symbol: Currency symbol, common, Other
U+FFE0 ET Symbol: Currency symbol, common, Other, [graphemebase]
U+FFE1 ET Symbol: Currency symbol, common, Other, [graphemebase]
U+FFE2 ON Symbol: Mathematical symbol, common, Other, [graphemebase, math]
U+FFE3 ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+FFE4 ON Symbol: Other symbol, common, Other, [graphemebase]
U+FFE5 ET Symbol: Currency symbol, common, Other, [graphemebase]
U+FFE6 ET Symbol: Currency symbol, common, Other, [graphemebase]
U+FFE7 L Control: Unassigned, unknown, Other
findprop ffe8 ffe9 ffea ffeb ffec ffed ffee ffef
U+FFE8 ON Symbol: Other symbol, common, Other
U+FFE9 ON Symbol: Mathematical symbol, common, Other
U+FFEA ON Symbol: Mathematical symbol, common, Other
U+FFEB ON Symbol: Mathematical symbol, common, Other
U+FFEC ON Symbol: Mathematical symbol, common, Other
U+FFED ON Symbol: Other symbol, common, Other
U+FFEE ON Symbol: Other symbol, common, Other
U+FFE8 ON Symbol: Other symbol, common, Other, [graphemebase]
U+FFE9 ON Symbol: Mathematical symbol, common, Other, [graphemebase, math]
U+FFEA ON Symbol: Mathematical symbol, common, Other, [graphemebase, math]
U+FFEB ON Symbol: Mathematical symbol, common, Other, [graphemebase, math]
U+FFEC ON Symbol: Mathematical symbol, common, Other, [graphemebase, math]
U+FFED ON Symbol: Other symbol, common, Other, [graphemebase]
U+FFEE ON Symbol: Other symbol, common, Other, [graphemebase]
U+FFEF L Control: Unassigned, unknown, Other
findprop fff8 fff9 fffa fffb fffc fffd fffe ffff
U+FFF8 BN Control: Unassigned, unknown, Control
U+FFF9 ON Control: Format, common, Control
U+FFFA ON Control: Format, common, Control
U+FFFB ON Control: Format, common, Control
U+FFFC ON Symbol: Other symbol, common, Other
U+FFFD ON Symbol: Other symbol, common, Other
U+FFFE BN Control: Unassigned, unknown, Other
U+FFFF BN Control: Unassigned, unknown, Other
U+FFF8 BN Control: Unassigned, unknown, Control, [defaultignorablecodepoint]
U+FFF9 ON Control: Format, common, Control, [caseignorable]
U+FFFA ON Control: Format, common, Control, [caseignorable]
U+FFFB ON Control: Format, common, Control, [caseignorable]
U+FFFC ON Symbol: Other symbol, common, Other, [graphemebase]
U+FFFD ON Symbol: Other symbol, common, Other, [graphemebase]
U+FFFE BN Control: Unassigned, unknown, Other, [noncharactercodepoint]
U+FFFF BN Control: Unassigned, unknown, Other, [noncharactercodepoint]
findprop 10000 10001 e01ef f0000 100000
U+10000 L Letter: Other letter, linearb, Other
U+10001 L Letter: Other letter, linearb, Other
U+E01EF NSM Mark: Non-spacing mark, inherited, Extend
U+10000 L Letter: Other letter, linearb, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+10001 L Letter: Other letter, linearb, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+E01EF NSM Mark: Non-spacing mark, inherited, Extend, [caseignorable, defaultignorablecodepoint, graphemeextend, idcontinue, variationselector, xidcontinue]
U+F0000 L Control: Private use, unknown, Other
U+100000 L Control: Private use, unknown, Other
findprop 1b00 12000 7c0 a840 10900
U+1B00 NSM Mark: Non-spacing mark, balinese, Extend
U+12000 L Letter: Other letter, cuneiform, Other
U+07C0 R Number: Decimal number, nko, Other
U+A840 L Letter: Other letter, phagspa, Other
U+10900 R Letter: Other letter, phoenician, Other
U+1B00 NSM Mark: Non-spacing mark, balinese, Extend, [alphabetic, caseignorable, graphemeextend, idcontinue, xidcontinue]
U+12000 L Letter: Other letter, cuneiform, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+07C0 R Number: Decimal number, nko, Other, [graphemebase, idcontinue, xidcontinue]
U+A840 L Letter: Other letter, phagspa, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+10900 R Letter: Other letter, phoenician, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
findprop 1d79 a77d
U+1D79 L Letter: Lower case letter, latin, Other, U+A77D
U+A77D L Letter: Upper case letter, latin, Other, U+1D79
U+1D79 L Letter: Lower case letter, latin, Other, U+A77D, [alphabetic, cased, changeswhencasemapped, changeswhentitlecased, changeswhenuppercased, graphemebase, idcontinue, idstart, lowercase, xidcontinue, xidstart]
U+A77D L Letter: Upper case letter, latin, Other, U+1D79, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
findprop 0800 083e a4d0 a4f7 aa80 aadf
U+0800 R Letter: Other letter, samaritan, Other
U+083E R Punctuation: Other punctuation, samaritan, Other
U+A4D0 L Letter: Other letter, lisu, Other
U+A4F7 L Letter: Other letter, lisu, Other
U+AA80 L Letter: Other letter, taiviet, Other
U+AADF L Punctuation: Other punctuation, taiviet, Other
U+0800 R Letter: Other letter, samaritan, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+083E R Punctuation: Other punctuation, samaritan, Other, [graphemebase, sentenceterminal, terminalpunctuation]
U+A4D0 L Letter: Other letter, lisu, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+A4F7 L Letter: Other letter, lisu, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AA80 L Letter: Other letter, taiviet, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AADF L Punctuation: Other punctuation, taiviet, Other, [graphemebase, terminalpunctuation]
findprop 10b00 10b35 13000 1342e 10840 10855
U+10B00 R Letter: Other letter, avestan, Other
U+10B35 R Letter: Other letter, avestan, Other
U+13000 L Letter: Other letter, egyptianhieroglyphs, Other
U+1342E L Letter: Other letter, egyptianhieroglyphs, Other
U+10840 R Letter: Other letter, imperialaramaic, Other
U+10855 R Letter: Other letter, imperialaramaic, Other
U+10B00 R Letter: Other letter, avestan, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+10B35 R Letter: Other letter, avestan, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+13000 L Letter: Other letter, egyptianhieroglyphs, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+1342E L Letter: Other letter, egyptianhieroglyphs, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+10840 R Letter: Other letter, imperialaramaic, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+10855 R Letter: Other letter, imperialaramaic, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
findprop 11100 1113c 11680 116c0
U+11100 NSM Mark: Non-spacing mark, chakma, Extend
U+1113C L Number: Decimal number, chakma, Other
U+11680 L Letter: Other letter, takri, Other
U+116C0 L Number: Decimal number, takri, Other
U+11100 NSM Mark: Non-spacing mark, chakma, Extend, [alphabetic, caseignorable, graphemeextend, idcontinue, xidcontinue]
U+1113C L Number: Decimal number, chakma, Other, [graphemebase, idcontinue, xidcontinue]
U+11680 L Letter: Other letter, takri, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+116C0 L Number: Decimal number, takri, Other, [graphemebase, idcontinue, xidcontinue]
findprop 0d 0a 0e 0711 1b04 1111 1169 11fe ae4c ad89
U+000D B Control: Control, common, CR
U+000A B Control: Control, common, LF
U+000E BN Control: Control, common, Control
U+0711 NSM Mark: Non-spacing mark, syriac, Extend
U+1B04 L Mark: Spacing mark, balinese, SpacingMark
U+1111 L Letter: Other letter, hangul, Hangul syllable type L
U+1169 L Letter: Other letter, hangul, Hangul syllable type V
U+11FE L Letter: Other letter, hangul, Hangul syllable type T
U+AE4C L Letter: Other letter, hangul, Hangul syllable type LV
U+AD89 L Letter: Other letter, hangul, Hangul syllable type LVT
U+000D B Control: Control, common, CR, [ascii, patternwhitespace, whitespace]
U+000A B Control: Control, common, LF, [ascii, patternwhitespace, whitespace]
U+000E BN Control: Control, common, Control, [ascii]
U+0711 NSM Mark: Non-spacing mark, syriac, Extend, [alphabetic, caseignorable, graphemeextend, idcontinue, xidcontinue]
U+1B04 L Mark: Spacing mark, balinese, SpacingMark, [alphabetic, graphemebase, idcontinue, xidcontinue]
U+1111 L Letter: Other letter, hangul, Hangul syllable type L, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+1169 L Letter: Other letter, hangul, Hangul syllable type V, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+11FE L Letter: Other letter, hangul, Hangul syllable type T, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AE4C L Letter: Other letter, hangul, Hangul syllable type LV, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AD89 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
findprop 118a0 11ac7 16ad0
U+118A0 L Letter: Upper case letter, warangciti, Other, U+118C0
U+11AC7 L Letter: Other letter, paucinhau, Other
U+16AD0 L Letter: Other letter, bassavah, Other
U+118A0 L Letter: Upper case letter, warangciti, Other, U+118C0, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+11AC7 L Letter: Other letter, paucinhau, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+16AD0 L Letter: Other letter, bassavah, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
findprop 11700 14400 108e0 11280 1d800
U+11700 L Letter: Other letter, ahom, Other
U+14400 L Letter: Other letter, anatolianhieroglyphs, Other
U+108E0 R Letter: Other letter, hatran, Other
U+11280 L Letter: Other letter, multani, Other
U+1D800 L Symbol: Other symbol, signwriting, Other
U+11700 L Letter: Other letter, ahom, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+14400 L Letter: Other letter, anatolianhieroglyphs, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+108E0 R Letter: Other letter, hatran, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+11280 L Letter: Other letter, multani, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+1D800 L Symbol: Other symbol, signwriting, Other, [graphemebase]
findprop 11800 1e903 11da9 10d27 11ee0 16e48 10f27 10f30
U+11800 L Letter: Other letter, dogra, Other
U+1E903 R Letter: Upper case letter, adlam, Other, U+1E925
U+11DA9 L Number: Decimal number, gunjalagondi, Other
U+10D27 NSM Mark: Non-spacing mark, hanifirohingya, Extend
U+11EE0 L Letter: Other letter, makasar, Other
U+16E48 L Letter: Upper case letter, medefaidrin, Other, U+16E68
U+10F27 R Letter: Other letter, oldsogdian, Other
U+10F30 AL Letter: Other letter, sogdian, Other
U+11800 L Letter: Other letter, dogra, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+1E903 R Letter: Upper case letter, adlam, Other, U+1E925, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+11DA9 L Number: Decimal number, gunjalagondi, Other, [graphemebase, idcontinue, xidcontinue]
U+10D27 NSM Mark: Non-spacing mark, hanifirohingya, Extend, [alphabetic, caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
U+11EE0 L Letter: Other letter, makasar, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+16E48 L Letter: Upper case letter, medefaidrin, Other, U+16E68, [alphabetic, cased, changeswhencasefolded, changeswhencasemapped, changeswhenlowercased, graphemebase, idcontinue, idstart, uppercase, xidcontinue, xidstart]
U+10F27 R Letter: Other letter, oldsogdian, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+10F30 AL Letter: Other letter, sogdian, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
findprop a836 a833 1cf4 20f0 1cd0
U+A836 L Symbol: Other symbol, common, Other, [devanagari, gurmukhi, gujarati, kaithi, takri, khojki, mahajani, modi, khudawadi, tirhuta, dogra]
U+A833 L Number: Other number, common, Other, [devanagari, gurmukhi, gujarati, kannada, kaithi, takri, khojki, mahajani, modi, khudawadi, tirhuta, dogra, nandinagari]
U+1CF4 NSM Mark: Non-spacing mark, inherited, Extend, [devanagari, kannada, grantha]
U+20F0 NSM Mark: Non-spacing mark, inherited, Extend, [latin, devanagari, grantha]
U+1CD0 NSM Mark: Non-spacing mark, inherited, Extend, [devanagari, bengali, kannada, grantha]
U+A836 L Symbol: Other symbol, common, Other, [devanagari, gurmukhi, gujarati, kaithi, takri, khojki, mahajani, modi, khudawadi, tirhuta, dogra], [graphemebase]
U+A833 L Number: Other number, common, Other, [devanagari, gurmukhi, gujarati, kannada, kaithi, takri, khojki, mahajani, modi, khudawadi, tirhuta, dogra, nandinagari], [graphemebase]
U+1CF4 NSM Mark: Non-spacing mark, inherited, Extend, [devanagari, kannada, grantha], [caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
U+20F0 NSM Mark: Non-spacing mark, inherited, Extend, [latin, devanagari, grantha], [caseignorable, graphemeextend, idcontinue, xidcontinue]
U+1CD0 NSM Mark: Non-spacing mark, inherited, Extend, [devanagari, bengali, kannada, grantha], [caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
findprop 32ff
U+32FF L Symbol: Other symbol, common, Other, [han]
U+32FF L Symbol: Other symbol, common, Other, [han], [graphemebase]
findprop 1f16d
U+1F16D ON Symbol: Other symbol, common, Extended Pictographic
U+1F16D ON Symbol: Other symbol, common, Extended Pictographic, [extendedpictographic, graphemebase]
findprop U+10e93 U+10eaa
U+10E93 R Letter: Other letter, yezidi, Other
U+10E93 R Letter: Other letter, yezidi, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+10EAA R Control: Unassigned, unknown, Other
findprop 0602 202a 202b 202c 2068 2069 202d 202e 2067
U+0602 AN Control: Format, arabic, Prepend
U+202A *LRE Control: Format, common, Control
U+202B *RLE Control: Format, common, Control
U+202C *PDF Control: Format, common, Control
U+2068 *FSI Control: Format, common, Control
U+2069 *PDI Control: Format, common, Control
U+202D *LRO Control: Format, common, Control
U+202E *RLO Control: Format, common, Control
U+2067 *RLI Control: Format, common, Control
U+0602 AN Control: Format, arabic, Prepend, [caseignorable, prependedconcatenationmark]
U+202A LRE Control: Format, common, Control, [bidicontrol, caseignorable, defaultignorablecodepoint]
U+202B RLE Control: Format, common, Control, [bidicontrol, caseignorable, defaultignorablecodepoint]
U+202C PDF Control: Format, common, Control, [bidicontrol, caseignorable, defaultignorablecodepoint]
U+2068 FSI Control: Format, common, Control, [bidicontrol, caseignorable, defaultignorablecodepoint]
U+2069 PDI Control: Format, common, Control, [bidicontrol, caseignorable, defaultignorablecodepoint]
U+202D LRO Control: Format, common, Control, [bidicontrol, caseignorable, defaultignorablecodepoint]
U+202E RLO Control: Format, common, Control, [bidicontrol, caseignorable, defaultignorablecodepoint]
U+2067 RLI Control: Format, common, Control, [bidicontrol, caseignorable, defaultignorablecodepoint]

View File

@ -1,224 +1,278 @@
find script Han
** Unrecognized script name "Han"
U+2E80..U+2E99 ON Symbol: Other symbol, han, Other, [graphemebase, radical]
U+2E9B..U+2EF3 ON Symbol: Other symbol, han, Other, [graphemebase, radical]
U+2F00..U+2FD5 ON Symbol: Other symbol, han, Other, [graphemebase, radical]
U+3005 L Letter: Modifier letter, han, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+3007 L Number: Letter number, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart]
U+3021..U+3029 L Number: Letter number, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart]
U+3038..U+303A L Number: Letter number, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart]
U+303B L Letter: Modifier letter, han, Other, [alphabetic, caseignorable, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+3400..U+4DBF L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, unifiedideograph, xidcontinue, xidstart]
U+4E00..U+9FFF L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, unifiedideograph, xidcontinue, xidstart]
U+F900..U+FA0D L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart]
U+FA0E..U+FA0F L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, unifiedideograph, xidcontinue, xidstart]
U+FA10 L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart]
U+FA11 L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, unifiedideograph, xidcontinue, xidstart]
U+FA12 L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart]
U+FA13..U+FA14 L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, unifiedideograph, xidcontinue, xidstart]
U+FA15..U+FA1E L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart]
U+FA1F L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, unifiedideograph, xidcontinue, xidstart]
U+FA20 L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart]
U+FA21 L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, unifiedideograph, xidcontinue, xidstart]
U+FA22 L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart]
U+FA23..U+FA24 L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, unifiedideograph, xidcontinue, xidstart]
U+FA25..U+FA26 L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart]
U+FA27..U+FA29 L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, unifiedideograph, xidcontinue, xidstart]
U+FA2A..U+FA6D L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart]
U+FA70..U+FAD9 L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart]
U+16FE2 ON Punctuation: Other punctuation, han, Other, [graphemebase]
U+16FE3 L Letter: Modifier letter, han, Other, [alphabetic, caseignorable, extender, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+16FF0..U+16FF1 L Mark: Spacing mark, han, SpacingMark, [alphabetic, diacritic, graphemebase, idcontinue, xidcontinue]
U+20000..U+2A6DF L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, unifiedideograph, xidcontinue, xidstart]
U+2A700..U+2B738 L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, unifiedideograph, xidcontinue, xidstart]
U+2B740..U+2B81D L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, unifiedideograph, xidcontinue, xidstart]
U+2B820..U+2CEA1 L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, unifiedideograph, xidcontinue, xidstart]
U+2CEB0..U+2EBE0 L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, unifiedideograph, xidcontinue, xidstart]
U+2F800..U+2FA1D L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, xidcontinue, xidstart]
U+30000..U+3134A L Letter: Other letter, han, Other, [alphabetic, graphemebase, idcontinue, idstart, ideographic, unifiedideograph, xidcontinue, xidstart]
find type Pe script Common scriptx Hangul
** Unrecognized script name "Common"
U+3009 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [bidimirrored, graphemebase, patternsyntax]
U+300B ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [bidimirrored, graphemebase, patternsyntax]
U+300D ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [bidimirrored, graphemebase, patternsyntax, quotationmark]
U+300F ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [bidimirrored, graphemebase, patternsyntax, quotationmark]
U+3011 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [bidimirrored, graphemebase, patternsyntax]
U+3015 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [bidimirrored, graphemebase, patternsyntax]
U+3017 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [bidimirrored, graphemebase, patternsyntax]
U+3019 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [bidimirrored, graphemebase, patternsyntax]
U+301B ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [bidimirrored, graphemebase, patternsyntax]
U+301E..U+301F ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han], [graphemebase, patternsyntax, quotationmark]
U+FF63 ON Punctuation: Close punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han, yiii], [bidimirrored, graphemebase, quotationmark]
find type Sk
U+005E ON Symbol: Modifier symbol, common, Other
U+0060 ON Symbol: Modifier symbol, common, Other
U+00A8 ON Symbol: Modifier symbol, common, Other
U+00AF ON Symbol: Modifier symbol, common, Other
U+00B4 ON Symbol: Modifier symbol, common, Other
U+00B8 ON Symbol: Modifier symbol, common, Other
U+02C2..U+02C5 ON Symbol: Modifier symbol, common, Other
U+02D2..U+02DF ON Symbol: Modifier symbol, common, Other
U+02E5..U+02E9 ON Symbol: Modifier symbol, common, Other
U+02EA..U+02EB ON Symbol: Modifier symbol, bopomofo, Other
U+02ED ON Symbol: Modifier symbol, common, Other
U+02EF..U+02FF ON Symbol: Modifier symbol, common, Other
U+0375 ON Symbol: Modifier symbol, greek, Other
U+0384 ON Symbol: Modifier symbol, greek, Other
U+0385 ON Symbol: Modifier symbol, common, Other
U+0888 AL Symbol: Modifier symbol, arabic, Other
U+1FBD ON Symbol: Modifier symbol, greek, Other
U+1FBF..U+1FC1 ON Symbol: Modifier symbol, greek, Other
U+1FCD..U+1FCF ON Symbol: Modifier symbol, greek, Other
U+1FDD..U+1FDF ON Symbol: Modifier symbol, greek, Other
U+1FED..U+1FEF ON Symbol: Modifier symbol, greek, Other
U+1FFD..U+1FFE ON Symbol: Modifier symbol, greek, Other
U+309B..U+309C ON Symbol: Modifier symbol, common, Other, [hiragana, katakana]
U+A700..U+A707 ON Symbol: Modifier symbol, common, Other, [latin, han]
U+A708..U+A716 ON Symbol: Modifier symbol, common, Other
U+A720..U+A721 ON Symbol: Modifier symbol, common, Other
U+A789..U+A78A L Symbol: Modifier symbol, common, Other
U+AB5B L Symbol: Modifier symbol, common, Other
U+AB6A..U+AB6B ON Symbol: Modifier symbol, common, Other
U+FBB2..U+FBC2 AL Symbol: Modifier symbol, arabic, Other
U+FF3E ON Symbol: Modifier symbol, common, Other
U+FF40 ON Symbol: Modifier symbol, common, Other
U+FFE3 ON Symbol: Modifier symbol, common, Other
U+1F3FB..U+1F3FF ON Symbol: Modifier symbol, common, Extend
U+005E ON Symbol: Modifier symbol, common, Other, [ascii, caseignorable, diacritic, graphemebase, math, patternsyntax]
U+0060 ON Symbol: Modifier symbol, common, Other, [ascii, caseignorable, diacritic, graphemebase, patternsyntax]
U+00A8 ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+00AF ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+00B4 ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+00B8 ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+02C2..U+02C5 ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+02D2..U+02DF ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+02E5..U+02E9 ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+02EA..U+02EB ON Symbol: Modifier symbol, bopomofo, Other, [caseignorable, diacritic, graphemebase]
U+02ED ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+02EF..U+02FF ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+0375 ON Symbol: Modifier symbol, greek, Other, [caseignorable, diacritic, graphemebase]
U+0384 ON Symbol: Modifier symbol, greek, Other, [caseignorable, diacritic, graphemebase]
U+0385 ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+0888 AL Symbol: Modifier symbol, arabic, Other, [caseignorable, graphemebase]
U+1FBD ON Symbol: Modifier symbol, greek, Other, [caseignorable, diacritic, graphemebase]
U+1FBF..U+1FC1 ON Symbol: Modifier symbol, greek, Other, [caseignorable, diacritic, graphemebase]
U+1FCD..U+1FCF ON Symbol: Modifier symbol, greek, Other, [caseignorable, diacritic, graphemebase]
U+1FDD..U+1FDF ON Symbol: Modifier symbol, greek, Other, [caseignorable, diacritic, graphemebase]
U+1FED..U+1FEF ON Symbol: Modifier symbol, greek, Other, [caseignorable, diacritic, graphemebase]
U+1FFD..U+1FFE ON Symbol: Modifier symbol, greek, Other, [caseignorable, diacritic, graphemebase]
U+309B..U+309C ON Symbol: Modifier symbol, common, Other, [hiragana, katakana], [caseignorable, diacritic, graphemebase, idcontinue, idstart]
U+A700..U+A707 ON Symbol: Modifier symbol, common, Other, [latin, han], [caseignorable, diacritic, graphemebase]
U+A708..U+A716 ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+A720..U+A721 ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+A789..U+A78A L Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+AB5B L Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+AB6A..U+AB6B ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+FBB2..U+FBC2 AL Symbol: Modifier symbol, arabic, Other, [caseignorable, graphemebase]
U+FF3E ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase, math]
U+FF40 ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+FFE3 ON Symbol: Modifier symbol, common, Other, [caseignorable, diacritic, graphemebase]
U+1F3FB..U+1F3FF ON Symbol: Modifier symbol, common, Extend, [caseignorable, emoji, emojicomponent, emojimodifier, emojipresentation, graphemebase]
find type Pd
U+002D ES Punctuation: Dash punctuation, common, Other
U+058A ON Punctuation: Dash punctuation, armenian, Other
U+05BE R Punctuation: Dash punctuation, hebrew, Other
U+1400 ON Punctuation: Dash punctuation, canadianaboriginal, Other
U+1806 ON Punctuation: Dash punctuation, mongolian, Other
U+2010..U+2015 ON Punctuation: Dash punctuation, common, Other
U+2E17 ON Punctuation: Dash punctuation, common, Other
U+2E1A ON Punctuation: Dash punctuation, common, Other
U+2E3A..U+2E3B ON Punctuation: Dash punctuation, common, Other
U+2E40 ON Punctuation: Dash punctuation, common, Other
U+2E5D ON Punctuation: Dash punctuation, common, Other
U+301C ON Punctuation: Dash punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han]
U+3030 ON Punctuation: Dash punctuation, common, Extended Pictographic, [hangul, hiragana, katakana, bopomofo, han]
U+30A0 ON Punctuation: Dash punctuation, common, Other, [hiragana, katakana]
U+FE31..U+FE32 ON Punctuation: Dash punctuation, common, Other
U+FE58 ON Punctuation: Dash punctuation, common, Other
U+FE63 ES Punctuation: Dash punctuation, common, Other
U+FF0D ES Punctuation: Dash punctuation, common, Other
U+10EAD R Punctuation: Dash punctuation, yezidi, Other
U+002D ES Punctuation: Dash punctuation, common, Other, [ascii, dash, graphemebase, patternsyntax]
U+058A ON Punctuation: Dash punctuation, armenian, Other, [dash, graphemebase]
U+05BE R Punctuation: Dash punctuation, hebrew, Other, [dash, graphemebase]
U+1400 ON Punctuation: Dash punctuation, canadianaboriginal, Other, [dash, graphemebase]
U+1806 ON Punctuation: Dash punctuation, mongolian, Other, [dash, graphemebase]
U+2010..U+2015 ON Punctuation: Dash punctuation, common, Other, [dash, graphemebase, patternsyntax]
U+2E17 ON Punctuation: Dash punctuation, common, Other, [dash, graphemebase, patternsyntax]
U+2E1A ON Punctuation: Dash punctuation, common, Other, [dash, graphemebase, patternsyntax]
U+2E3A..U+2E3B ON Punctuation: Dash punctuation, common, Other, [dash, graphemebase, patternsyntax]
U+2E40 ON Punctuation: Dash punctuation, common, Other, [dash, graphemebase, patternsyntax]
U+2E5D ON Punctuation: Dash punctuation, common, Other, [dash, graphemebase, patternsyntax]
U+301C ON Punctuation: Dash punctuation, common, Other, [hangul, hiragana, katakana, bopomofo, han], [dash, graphemebase, patternsyntax]
U+3030 ON Punctuation: Dash punctuation, common, Extended Pictographic, [hangul, hiragana, katakana, bopomofo, han], [dash, emoji, extendedpictographic, graphemebase, patternsyntax]
U+30A0 ON Punctuation: Dash punctuation, common, Other, [hiragana, katakana], [dash, graphemebase]
U+FE31..U+FE32 ON Punctuation: Dash punctuation, common, Other, [dash, graphemebase]
U+FE58 ON Punctuation: Dash punctuation, common, Other, [dash, graphemebase]
U+FE63 ES Punctuation: Dash punctuation, common, Other, [dash, graphemebase, math]
U+FF0D ES Punctuation: Dash punctuation, common, Other, [dash, graphemebase]
U+10EAD R Punctuation: Dash punctuation, yezidi, Other, [dash, graphemebase]
find gbreak LVT
U+AC01..U+AC1B L Letter: Other letter, hangul, Hangul syllable type LVT
U+AC1D..U+AC37 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AC39..U+AC53 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AC55..U+AC6F L Letter: Other letter, hangul, Hangul syllable type LVT
U+AC71..U+AC8B L Letter: Other letter, hangul, Hangul syllable type LVT
U+AC8D..U+ACA7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+ACA9..U+ACC3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+ACC5..U+ACDF L Letter: Other letter, hangul, Hangul syllable type LVT
U+ACE1..U+ACFB L Letter: Other letter, hangul, Hangul syllable type LVT
U+ACFD..U+AD17 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AD19..U+AD33 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AD35..U+AD4F L Letter: Other letter, hangul, Hangul syllable type LVT
U+AD51..U+AD6B L Letter: Other letter, hangul, Hangul syllable type LVT
U+AD6D..U+AD87 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AD89..U+ADA3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+ADA5..U+ADBF L Letter: Other letter, hangul, Hangul syllable type LVT
U+ADC1..U+ADDB L Letter: Other letter, hangul, Hangul syllable type LVT
U+ADDD..U+ADF7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+ADF9..U+AE13 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AE15..U+AE2F L Letter: Other letter, hangul, Hangul syllable type LVT
U+AE31..U+AE4B L Letter: Other letter, hangul, Hangul syllable type LVT
U+AE4D..U+AE67 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AE69..U+AE83 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AE85..U+AE9F L Letter: Other letter, hangul, Hangul syllable type LVT
U+AEA1..U+AEBB L Letter: Other letter, hangul, Hangul syllable type LVT
U+AEBD..U+AED7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AED9..U+AEF3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AEF5..U+AF0F L Letter: Other letter, hangul, Hangul syllable type LVT
U+AF11..U+AF2B L Letter: Other letter, hangul, Hangul syllable type LVT
U+AF2D..U+AF47 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AF49..U+AF63 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AF65..U+AF7F L Letter: Other letter, hangul, Hangul syllable type LVT
U+AF81..U+AF9B L Letter: Other letter, hangul, Hangul syllable type LVT
U+AF9D..U+AFB7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AFB9..U+AFD3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AFD5..U+AFEF L Letter: Other letter, hangul, Hangul syllable type LVT
U+AFF1..U+B00B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B00D..U+B027 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B029..U+B043 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B045..U+B05F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B061..U+B07B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B07D..U+B097 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B099..U+B0B3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B0B5..U+B0CF L Letter: Other letter, hangul, Hangul syllable type LVT
U+B0D1..U+B0EB L Letter: Other letter, hangul, Hangul syllable type LVT
U+B0ED..U+B107 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B109..U+B123 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B125..U+B13F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B141..U+B15B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B15D..U+B177 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B179..U+B193 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B195..U+B1AF L Letter: Other letter, hangul, Hangul syllable type LVT
U+B1B1..U+B1CB L Letter: Other letter, hangul, Hangul syllable type LVT
U+B1CD..U+B1E7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B1E9..U+B203 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B205..U+B21F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B221..U+B23B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B23D..U+B257 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B259..U+B273 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B275..U+B28F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B291..U+B2AB L Letter: Other letter, hangul, Hangul syllable type LVT
U+B2AD..U+B2C7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B2C9..U+B2E3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B2E5..U+B2FF L Letter: Other letter, hangul, Hangul syllable type LVT
U+B301..U+B31B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B31D..U+B337 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B339..U+B353 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B355..U+B36F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B371..U+B38B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B38D..U+B3A7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B3A9..U+B3C3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B3C5..U+B3DF L Letter: Other letter, hangul, Hangul syllable type LVT
U+B3E1..U+B3FB L Letter: Other letter, hangul, Hangul syllable type LVT
U+B3FD..U+B417 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B419..U+B433 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B435..U+B44F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B451..U+B46B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B46D..U+B487 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B489..U+B4A3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B4A5..U+B4BF L Letter: Other letter, hangul, Hangul syllable type LVT
U+B4C1..U+B4DB L Letter: Other letter, hangul, Hangul syllable type LVT
U+B4DD..U+B4F7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B4F9..U+B513 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B515..U+B52F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B531..U+B54B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B54D..U+B567 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B569..U+B583 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B585..U+B59F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B5A1..U+B5BB L Letter: Other letter, hangul, Hangul syllable type LVT
U+B5BD..U+B5D7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B5D9..U+B5F3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B5F5..U+B60F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B611..U+B62B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B62D..U+B647 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B649..U+B663 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B665..U+B67F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B681..U+B69B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B69D..U+B6B7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B6B9..U+B6D3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B6D5..U+B6EF L Letter: Other letter, hangul, Hangul syllable type LVT
U+AC01..U+AC1B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AC1D..U+AC37 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AC39..U+AC53 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AC55..U+AC6F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AC71..U+AC8B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AC8D..U+ACA7 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+ACA9..U+ACC3 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+ACC5..U+ACDF L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+ACE1..U+ACFB L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+ACFD..U+AD17 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AD19..U+AD33 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AD35..U+AD4F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AD51..U+AD6B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AD6D..U+AD87 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AD89..U+ADA3 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+ADA5..U+ADBF L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+ADC1..U+ADDB L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+ADDD..U+ADF7 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+ADF9..U+AE13 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AE15..U+AE2F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AE31..U+AE4B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AE4D..U+AE67 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AE69..U+AE83 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AE85..U+AE9F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AEA1..U+AEBB L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AEBD..U+AED7 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AED9..U+AEF3 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AEF5..U+AF0F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AF11..U+AF2B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AF2D..U+AF47 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AF49..U+AF63 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AF65..U+AF7F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AF81..U+AF9B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AF9D..U+AFB7 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AFB9..U+AFD3 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AFD5..U+AFEF L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+AFF1..U+B00B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B00D..U+B027 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B029..U+B043 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B045..U+B05F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B061..U+B07B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B07D..U+B097 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B099..U+B0B3 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B0B5..U+B0CF L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B0D1..U+B0EB L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B0ED..U+B107 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B109..U+B123 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B125..U+B13F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B141..U+B15B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B15D..U+B177 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B179..U+B193 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B195..U+B1AF L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B1B1..U+B1CB L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B1CD..U+B1E7 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B1E9..U+B203 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B205..U+B21F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B221..U+B23B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B23D..U+B257 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B259..U+B273 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B275..U+B28F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B291..U+B2AB L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B2AD..U+B2C7 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B2C9..U+B2E3 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B2E5..U+B2FF L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B301..U+B31B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B31D..U+B337 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B339..U+B353 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B355..U+B36F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B371..U+B38B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B38D..U+B3A7 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B3A9..U+B3C3 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B3C5..U+B3DF L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B3E1..U+B3FB L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B3FD..U+B417 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B419..U+B433 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B435..U+B44F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B451..U+B46B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B46D..U+B487 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B489..U+B4A3 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B4A5..U+B4BF L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B4C1..U+B4DB L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B4DD..U+B4F7 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B4F9..U+B513 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B515..U+B52F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B531..U+B54B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B54D..U+B567 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B569..U+B583 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B585..U+B59F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B5A1..U+B5BB L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B5BD..U+B5D7 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B5D9..U+B5F3 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B5F5..U+B60F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B611..U+B62B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B62D..U+B647 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B649..U+B663 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B665..U+B67F L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B681..U+B69B L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B69D..U+B6B7 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B6B9..U+B6D3 L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+B6D5..U+B6EF L Letter: Other letter, hangul, Hangul syllable type LVT, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
...
find script Old_Uyghur
** Unrecognized script name "Old_Uyghur"
U+10F70..U+10F81 R Letter: Other letter, olduyghur, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+10F82..U+10F85 NSM Mark: Non-spacing mark, olduyghur, Extend, [caseignorable, diacritic, graphemeextend, idcontinue, xidcontinue]
U+10F86..U+10F89 R Punctuation: Other punctuation, olduyghur, Other, [graphemebase, sentenceterminal, terminalpunctuation]
find bidi PDF
U+202C *PDF Control: Format, common, Control
U+202C PDF Control: Format, common, Control, [bidicontrol, caseignorable, defaultignorablecodepoint]
find bidi CS
U+002C CS Punctuation: Other punctuation, common, Other
U+002E..U+002F CS Punctuation: Other punctuation, common, Other
U+003A CS Punctuation: Other punctuation, common, Other
U+00A0 CS Separator: Space separator, common, Other
U+060C CS Punctuation: Other punctuation, common, Other, [arabic, syriac, thaana, nko, hanifirohingya, yezidi]
U+202F CS Separator: Space separator, common, Other, [latin, mongolian]
U+2044 CS Symbol: Mathematical symbol, common, Other
U+FE50 CS Punctuation: Other punctuation, common, Other
U+FE52 CS Punctuation: Other punctuation, common, Other
U+FE55 CS Punctuation: Other punctuation, common, Other
U+FF0C CS Punctuation: Other punctuation, common, Other
U+FF0E..U+FF0F CS Punctuation: Other punctuation, common, Other
U+FF1A CS Punctuation: Other punctuation, common, Other
U+002C CS Punctuation: Other punctuation, common, Other, [ascii, graphemebase, patternsyntax, terminalpunctuation]
U+002E CS Punctuation: Other punctuation, common, Other, [ascii, caseignorable, graphemebase, patternsyntax, sentenceterminal, terminalpunctuation]
U+002F CS Punctuation: Other punctuation, common, Other, [ascii, graphemebase, patternsyntax]
U+003A CS Punctuation: Other punctuation, common, Other, [ascii, caseignorable, graphemebase, patternsyntax, terminalpunctuation]
U+00A0 CS Separator: Space separator, common, Other, [graphemebase, whitespace]
U+060C CS Punctuation: Other punctuation, common, Other, [arabic, syriac, thaana, nko, hanifirohingya, yezidi], [graphemebase, terminalpunctuation]
U+202F CS Separator: Space separator, common, Other, [latin, mongolian], [graphemebase, whitespace]
U+2044 CS Symbol: Mathematical symbol, common, Other, [graphemebase, math, patternsyntax]
U+FE50 CS Punctuation: Other punctuation, common, Other, [graphemebase, terminalpunctuation]
U+FE52 CS Punctuation: Other punctuation, common, Other, [caseignorable, graphemebase, sentenceterminal, terminalpunctuation]
U+FE55 CS Punctuation: Other punctuation, common, Other, [caseignorable, graphemebase, terminalpunctuation]
U+FF0C CS Punctuation: Other punctuation, common, Other, [graphemebase, terminalpunctuation]
U+FF0E CS Punctuation: Other punctuation, common, Other, [caseignorable, graphemebase, sentenceterminal, terminalpunctuation]
U+FF0F CS Punctuation: Other punctuation, common, Other, [graphemebase]
U+FF1A CS Punctuation: Other punctuation, common, Other, [caseignorable, graphemebase, terminalpunctuation]
find bidi CS type Sm
U+2044 CS Symbol: Mathematical symbol, common, Other
U+2044 CS Symbol: Mathematical symbol, common, Other, [graphemebase, math, patternsyntax]
find bidi B
U+000A B Control: Control, common, LF
U+000D B Control: Control, common, CR
U+001C..U+001E B Control: Control, common, Control
U+0085 B Control: Control, common, Control
U+2029 B Separator: Paragraph separator, common, Control
U+000A B Control: Control, common, LF, [ascii, patternwhitespace, whitespace]
U+000D B Control: Control, common, CR, [ascii, patternwhitespace, whitespace]
U+001C..U+001E B Control: Control, common, Control, [ascii]
U+0085 B Control: Control, common, Control, [patternwhitespace, whitespace]
U+2029 B Separator: Paragraph separator, common, Control, [patternwhitespace, whitespace]
find bidi FSI
U+2068 *FSI Control: Format, common, Control
U+2068 FSI Control: Format, common, Control, [bidicontrol, caseignorable, defaultignorablecodepoint]
find bidi PDI
U+2069 *PDI Control: Format, common, Control
U+2069 PDI Control: Format, common, Control, [bidicontrol, caseignorable, defaultignorablecodepoint]
find bidi RLI
U+2067 *RLI Control: Format, common, Control
U+2067 RLI Control: Format, common, Control, [bidicontrol, caseignorable, defaultignorablecodepoint]
find bidi RLO
U+202E *RLO Control: Format, common, Control
U+202E RLO Control: Format, common, Control, [bidicontrol, caseignorable, defaultignorablecodepoint]
find bidi S
U+0009 S Control: Control, common, Control
U+000B S Control: Control, common, Control
U+001F S Control: Control, common, Control
U+0009 S Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
U+000B S Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
U+001F S Control: Control, common, Control, [ascii]
find bidi WS
U+000C WS Control: Control, common, Control
U+0020 WS Separator: Space separator, common, Other
U+1680 WS Separator: Space separator, ogham, Other
U+2000..U+200A WS Separator: Space separator, common, Other
U+2028 WS Separator: Line separator, common, Control
U+205F WS Separator: Space separator, common, Other
U+3000 WS Separator: Space separator, common, Other
find bidi_control
U+061C *AL Control: Format, arabic, Control, [arabic, syriac, thaana]
U+200E *L Control: Format, common, Control
U+200F *R Control: Format, common, Control
U+202A *LRE Control: Format, common, Control
U+202B *RLE Control: Format, common, Control
U+202C *PDF Control: Format, common, Control
U+202D *LRO Control: Format, common, Control
U+202E *RLO Control: Format, common, Control
U+2066 *LRI Control: Format, common, Control
U+2067 *RLI Control: Format, common, Control
U+2068 *FSI Control: Format, common, Control
U+2069 *PDI Control: Format, common, Control
U+000C WS Control: Control, common, Control, [ascii, patternwhitespace, whitespace]
U+0020 WS Separator: Space separator, common, Other, [ascii, graphemebase, patternwhitespace, whitespace]
U+1680 WS Separator: Space separator, ogham, Other, [graphemebase, whitespace]
U+2000..U+200A WS Separator: Space separator, common, Other, [graphemebase, whitespace]
U+2028 WS Separator: Line separator, common, Control, [patternwhitespace, whitespace]
U+205F WS Separator: Space separator, common, Other, [graphemebase, whitespace]
U+3000 WS Separator: Space separator, common, Other, [graphemebase, whitespace]
find script bopo
U+02EA..U+02EB ON Symbol: Modifier symbol, bopomofo, Other
U+3105..U+312F L Letter: Other letter, bopomofo, Other
U+31A0..U+31BF L Letter: Other letter, bopomofo, Other
U+02EA..U+02EB ON Symbol: Modifier symbol, bopomofo, Other, [caseignorable, diacritic, graphemebase]
U+3105..U+312F L Letter: Other letter, bopomofo, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
U+31A0..U+31BF L Letter: Other letter, bopomofo, Other, [alphabetic, graphemebase, idcontinue, idstart, xidcontinue, xidstart]
find bool prependedconcatenationmark
U+0600..U+0604 AN Control: Format, arabic, Prepend, [caseignorable, prependedconcatenationmark]
U+0605 AN Control: Format, common, Prepend, [caseignorable, prependedconcatenationmark]
U+06DD AN Control: Format, common, Prepend, [caseignorable, prependedconcatenationmark]
U+070F AL Control: Format, syriac, Prepend, [caseignorable, prependedconcatenationmark]
U+0890..U+0891 AN Control: Format, arabic, Prepend, [caseignorable, prependedconcatenationmark]
U+08E2 AN Control: Format, common, Prepend, [caseignorable, prependedconcatenationmark]
U+110BD L Control: Format, kaithi, Prepend, [caseignorable, prependedconcatenationmark]
U+110CD L Control: Format, kaithi, Prepend, [caseignorable, prependedconcatenationmark]
find bool pcm
U+0600..U+0604 AN Control: Format, arabic, Prepend, [caseignorable, prependedconcatenationmark]
U+0605 AN Control: Format, common, Prepend, [caseignorable, prependedconcatenationmark]
U+06DD AN Control: Format, common, Prepend, [caseignorable, prependedconcatenationmark]
U+070F AL Control: Format, syriac, Prepend, [caseignorable, prependedconcatenationmark]
U+0890..U+0891 AN Control: Format, arabic, Prepend, [caseignorable, prependedconcatenationmark]
U+08E2 AN Control: Format, common, Prepend, [caseignorable, prependedconcatenationmark]
U+110BD L Control: Format, kaithi, Prepend, [caseignorable, prependedconcatenationmark]
U+110CD L Control: Format, kaithi, Prepend, [caseignorable, prependedconcatenationmark]

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2021 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -123,21 +123,21 @@ opcode is used to select the column. The values are as follows:
*/
static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
/* ANY LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BIDICO */
/* ANY LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */
{ 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 1 }, /* PT_LAMP */
{ 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_LAMP */
{ 0, 0, 2, 4, 0, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */
{ 0, 0, 5, 2, 0, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */
{ 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */
{ 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SCX */
{ 0, 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 1 }, /* PT_ALNUM */
{ 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 1 }, /* PT_SPACE */
{ 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 1 }, /* PT_PXSPACE */
{ 0, 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 1 }, /* PT_WORD */
{ 0, 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_ALNUM */
{ 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_SPACE */
{ 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_PXSPACE */
{ 0, 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0 }, /* PT_WORD */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */
{ 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 } /* PT_BIDICO */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* PT_BOOL */
};
/* This table is used to check whether auto-possessification is possible
@ -266,7 +266,7 @@ switch(ptype)
case PT_BIDICL:
return FALSE;
case PT_BIDICO:
case PT_BOOL:
return FALSE;
}

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2021 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2021 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -1245,14 +1245,15 @@ for (;;)
c >= 0xe000;
break;
case PT_BIDICO:
OK = UCD_BIDICONTROL(c) != 0;
break;
case PT_BIDICL:
OK = UCD_BIDICLASS(c) == code[2];
break;
case PT_BOOL:
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, code[2]) != 0;
break;
/* Should never occur, but keep compilers from grumbling. */
default:
@ -1516,14 +1517,15 @@ for (;;)
c >= 0xe000;
break;
case PT_BIDICO:
OK = UCD_BIDICONTROL(c) != 0;
break;
case PT_BIDICL:
OK = UCD_BIDICLASS(c) == code[3];
break;
case PT_BOOL:
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, code[3]) != 0;
break;
/* Should never occur, but keep compilers from grumbling. */
default:
@ -1770,14 +1772,15 @@ for (;;)
c >= 0xe000;
break;
case PT_BIDICO:
OK = UCD_BIDICONTROL(c) != 0;
break;
case PT_BIDICL:
OK = UCD_BIDICLASS(c) == code[3];
break;
case PT_BOOL:
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, code[3]) != 0;
break;
/* Should never occur, but keep compilers from grumbling. */
default:
@ -2050,14 +2053,16 @@ for (;;)
c >= 0xe000;
break;
case PT_BIDICO:
OK = UCD_BIDICONTROL(c) != 0;
break;
case PT_BIDICL:
OK = UCD_BIDICLASS(c) == code[1 + IMM2_SIZE + 2];
break;
case PT_BOOL:
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size,
code[1 + IMM2_SIZE + 2]) != 0;
break;
/* Should never occur, but keep compilers from grumbling. */
default:

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2021 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -1286,7 +1286,7 @@ match. */
#define PT_CLIST 10 /* Pseudo-property: match character list */
#define PT_UCNC 11 /* Universal Character nameable character */
#define PT_BIDICL 12 /* Specified bidi class */
#define PT_BIDICO 13 /* Bidi control character */
#define PT_BOOL 13 /* Boolean property */
#define PT_TABSIZE 14 /* Size of square table for autopossessify tests */
/* The following special properties are used only in XCLASS items, when POSIX
@ -1824,7 +1824,8 @@ typedef struct {
int32_t other_case; /* offset to other case, or zero if none */
uint8_t scriptx; /* script extension value */
uint8_t bidi; /* bidi class and control flag */
uint16_t dummy; /* spare - to round to multiple of 4 bytes */
uint8_t bprops; /* binary properties offset */
uint8_t dummy; /* spare - to round to multiple of 4 bytes */
} ucd_record;
/* UCD access macros */
@ -1848,13 +1849,14 @@ typedef struct {
#define UCD_CASESET(ch) GET_UCD(ch)->caseset
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
#define UCD_SCRIPTX(ch) GET_UCD(ch)->scriptx
#define UCD_BPROPS(ch) GET_UCD(ch)->bprops
/* The "scriptx" field gives an offset into a vector of 32-bit words that
form a bitmap representing a list of scripts. These macros test or set the bit
for a script in the map by number. */
/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words
that form a bitmap representing a list of scripts or boolean properties. These
macros test or set a bit in the map by number. */
#define MAPBIT(map,script) ((map)[(script)/32]&(1u<<((script)%32)))
#define MAPSET(map,script) ((map)[(script)/32]|=(1u<<((script)%32)))
#define MAPBIT(map,n) ((map)[(n)/32]&(1u<<((n)%32)))
#define MAPSET(map,n) ((map)[(n)/32]|=(1u<<((n)%32)))
/* The "bidi" field has the 0x80 bit set if the character has the Bidi_Control
property. The remaining bits hold the bidi class, but as there are only 23
@ -1921,6 +1923,7 @@ extern const uint8_t PRIV(utf8_table4)[];
#endif
#define _pcre2_hspace_list PCRE2_SUFFIX(_pcre2_hspace_list_)
#define _pcre2_vspace_list PCRE2_SUFFIX(_pcre2_vspace_list_)
#define _pcre2_ucd_boolprop_sets PCRE2_SUFFIX(_pcre2_ucd_boolprop_sets_)
#define _pcre2_ucd_caseless_sets PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
#define _pcre2_ucd_digit_sets PCRE2_SUFFIX(_pcre2_ucd_digit_sets_)
#define _pcre2_ucd_script_sets PCRE2_SUFFIX(_pcre2_ucd_script_sets_)
@ -1944,6 +1947,7 @@ extern const pcre2_match_context PRIV(default_match_context);
extern const uint8_t PRIV(default_tables)[];
extern const uint32_t PRIV(hspace_list)[];
extern const uint32_t PRIV(vspace_list)[];
extern const uint32_t PRIV(ucd_boolprop_sets)[];
extern const uint32_t PRIV(ucd_caseless_sets)[];
extern const uint32_t PRIV(ucd_digit_sets)[];
extern const uint32_t PRIV(ucd_script_sets)[];

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2015-2021 University of Cambridge
New API code Copyright (c) 2015-2022 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -2513,16 +2513,19 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
RRETURN(MATCH_NOMATCH);
break;
case PT_BIDICO:
if (((prop->bidi & UCD_BIDICONTROL_BIT) != 0) == notmatch)
RRETURN(MATCH_NOMATCH);
break;
case PT_BIDICL:
if (((prop->bidi & UCD_BIDICLASS_MASK) == Fecode[2]) == notmatch)
RRETURN(MATCH_NOMATCH);
break;
case PT_BOOL:
{
BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, Fecode[2]) != 0;
if (ok == notmatch) RRETURN(MATCH_NOMATCH);
}
break;
/* This should never occur */
default:
@ -2845,20 +2848,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
break;
case PT_BIDICO:
for (i = 1; i <= Lmin; i++)
{
if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
if ((UCD_BIDICONTROL(fc) != 0) == notmatch)
RRETURN(MATCH_NOMATCH);
}
break;
case PT_BIDICL:
for (i = 1; i <= Lmin; i++)
{
@ -2873,6 +2862,25 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
break;
case PT_BOOL:
for (i = 1; i <= Lmin; i++)
{
BOOL ok;
const ucd_record *prop;
if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
prop = GET_UCD(fc);
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, Lpropvalue) != 0;
if (ok == notmatch)
RRETURN(MATCH_NOMATCH);
}
break;
/* This should not occur */
default:
@ -3654,23 +3662,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
/* Control never gets here */
case PT_BIDICO:
for (;;)
{
RMATCH(Fecode, RM223);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
if ((UCD_BIDICONTROL(fc) != 0) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
case PT_BIDICL:
for (;;)
{
@ -3688,6 +3679,28 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
/* Control never gets here */
case PT_BOOL:
for (;;)
{
BOOL ok;
const ucd_record *prop;
RMATCH(Fecode, RM223);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
prop = GET_UCD(fc);
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, Lpropvalue) != 0;
if (ok == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
/* This should never occur */
default:
return PCRE2_ERROR_INTERNAL;
@ -4221,21 +4234,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
break;
case PT_BIDICO:
for (i = Lmin; i < Lmax; i++)
{
int len = 1;
if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
GETCHARLENTEST(fc, Feptr, len);
if ((UCD_BIDICONTROL(fc) != 0) == notmatch) break;
Feptr+= len;
}
break;
case PT_BIDICL:
for (i = Lmin; i < Lmax; i++)
{
@ -4251,6 +4249,26 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
break;
case PT_BOOL:
for (i = Lmin; i < Lmax; i++)
{
BOOL ok;
const ucd_record *prop;
int len = 1;
if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
GETCHARLENTEST(fc, Feptr, len);
prop = GET_UCD(fc);
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, Lpropvalue) != 0;
if (ok == notmatch) break;
Feptr+= len;
}
break;
default:
return PCRE2_ERROR_INTERNAL;
}

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2021 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -232,9 +232,9 @@ function should not be called in such configurations, because a pattern that
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
into the main code, however, we just put one into this function.
Now that the table contains both full script names and their 4-character
abbreviations, we do some fiddling to try to get the full name, which is either
the longer of two found names, or a 3-character name. */
Now that the table contains both full names and their abbreviations, we do some
fiddling to try to get the full name, which is either the longer of two found
names, or a 3-character script name. */
static const char *
get_ucpname(unsigned int ptype, unsigned int pvalue)
@ -243,19 +243,18 @@ get_ucpname(unsigned int ptype, unsigned int pvalue)
int count = 0;
const char *yield = "??";
size_t len = 0;
if (ptype == PT_SC) ptype = PT_SCX; /* Table has scx values */
unsigned int ptypex = (ptype == PT_SC)? PT_SCX : ptype;
for (int i = PRIV(utt_size) - 1; i >= 0; i--)
{
const ucp_type_table *u = PRIV(utt) + i;
if (ptype == u->type && pvalue == u->value)
if ((ptype == u->type || ptypex == u->type) && pvalue == u->value)
{
const char *s = PRIV(utt_names) + u->name_offset;
size_t sl = strlen(s);
if (sl == 3)
if (sl == 3 && (u->type == PT_SC || u->type == PT_SCX))
{
yield = s;
break;

File diff suppressed because it is too large Load Diff

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2021 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
Instead, modify the maint/GenerateUcpHeader.py script and run it to generate
@ -100,6 +100,69 @@ enum {
ucp_Zs, /* Space separator */
};
/* These are Boolean properties. */
enum {
ucp_ASCII,
ucp_ASCII_Hex_Digit,
ucp_Alphabetic,
ucp_Bidi_Control,
ucp_Bidi_Mirrored,
ucp_Case_Ignorable,
ucp_Cased,
ucp_Changes_When_Casefolded,
ucp_Changes_When_Casemapped,
ucp_Changes_When_Lowercased,
ucp_Changes_When_Titlecased,
ucp_Changes_When_Uppercased,
ucp_Dash,
ucp_Default_Ignorable_Code_Point,
ucp_Deprecated,
ucp_Diacritic,
ucp_Emoji,
ucp_Emoji_Component,
ucp_Emoji_Modifier,
ucp_Emoji_Modifier_Base,
ucp_Emoji_Presentation,
ucp_Extended_Pictographic,
ucp_Extender,
ucp_Grapheme_Base,
ucp_Grapheme_Extend,
ucp_Grapheme_Link,
ucp_Hex_Digit,
ucp_IDS_Binary_Operator,
ucp_IDS_Trinary_Operator,
ucp_ID_Continue,
ucp_ID_Start,
ucp_Ideographic,
ucp_Join_Control,
ucp_Logical_Order_Exception,
ucp_Lowercase,
ucp_Math,
ucp_Noncharacter_Code_Point,
ucp_Pattern_Syntax,
ucp_Pattern_White_Space,
ucp_Prepended_Concatenation_Mark,
ucp_Quotation_Mark,
ucp_Radical,
ucp_Regional_Indicator,
ucp_Sentence_Terminal,
ucp_Soft_Dotted,
ucp_Terminal_Punctuation,
ucp_Unified_Ideograph,
ucp_Uppercase,
ucp_Variation_Selector,
ucp_White_Space,
ucp_XID_Continue,
ucp_XID_Start,
/* This must be last */
ucp_Bprop_Count
};
/* Size of entries in ucd_boolprop_sets[] */
#define ucd_boolprop_sets_item_size 2
/* These are the bidi class values. */
enum {
@ -322,6 +385,10 @@ enum {
ucp_Script_Count
};
/* Size of entries in ucd_script_sets[] */
#define ucd_script_sets_item_size 3
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
/* End of pcre2_ucp.h */

File diff suppressed because it is too large Load Diff

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2021 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -214,16 +214,17 @@ while ((t = *data++) != XCL_END)
}
break;
case PT_BIDICO:
if (((prop->bidi & UCD_BIDICONTROL_BIT) != 0) == isprop)
return !negated;
break;
case PT_BIDICL:
if (((prop->bidi & UCD_BIDICLASS_MASK) == data[1]) == isprop)
return !negated;
break;
case PT_BOOL:
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, data[1]) != 0;
if (ok == isprop) return !negated;
break;
/* The following three properties can occur only in an XCLASS, as there
is no \p or \P coding for them. */

View File

@ -4937,7 +4937,7 @@ Subject length lower bound = 3
/\p{L&}+\p{bidi_control}/B
------------------------------------------------------------------
Bra
prop Lc ++
prop Lc +
prop Bidicontrol
Ket
End
@ -4946,7 +4946,7 @@ Subject length lower bound = 3
/\p{bidi_control}+\p{L&}/B
------------------------------------------------------------------
Bra
prop Bidicontrol ++
prop Bidicontrol +
prop Lc
Ket
End