Add support for Bidi_Control and Bidi_Class properties

This commit is contained in:
Philip Hazel 2021-12-08 15:34:27 +00:00
parent 823d4ac956
commit 0246c6bf64
21 changed files with 2210 additions and 1316 deletions

View File

@ -546,8 +546,9 @@ Each is followed by two code units that encode the desired property as a type
and a value. The types are a set of #defines of the form PT_xxx, and the values
are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file.
The value is relevant only for PT_GC (General Category), PT_PC (Particular
Category), PT_SC (Script), and the pseudo-property PT_CLIST, which is used to
identify a list of case-equivalent characters when there are three or more.
Category), PT_SC (Script), PT_BIDICL (Bidi Class), and the pseudo-property
PT_CLIST, which is used to identify a list of case-equivalent characters when
there are three or more.
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
three code units: OP_PROP or OP_NOTPROP, and then the desired property type and
@ -827,4 +828,4 @@ not a real opcode, but is used to check at compile time that tables indexed by
opcode are the correct length, in order to catch updating errors.
Philip Hazel
12 July 2019
December 2021

View File

@ -29,6 +29,9 @@
# Added script names for Unicode 12.1.0, 27-July-2019.
# Added script names for Unicode 13.0.0, 10-March-2020.
# Added Script names for Unicode 14.0.0, PCRE2-10.39
# Added support for bidi class and bidi control, 06-December-2021
# This also involved lower casing strings and removing underscores, in
# accordance with Unicode's "loose matching" rules, which Perl observes.
script_names = ['Unknown', 'Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \
'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \
@ -78,21 +81,46 @@ category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
general_category_names = ['C', 'L', 'M', 'N', 'P', 'S', 'Z']
# First add the Unicode script and category names.
bidiclass_names = ['bidiAL', 'bidiAN', 'bidiB', 'bidiBN', 'bidiCS', 'bidiEN',
'bidiES', 'bidiET', 'bidiFSI', 'bidiL', 'bidiLRE', 'bidiLRI', 'bidiLRO',
'bidiNSM', 'bidiON', 'bidiPDF', 'bidiPDI', 'bidiR', 'bidiRLE', 'bidiRLI',
'bidiRLO', 'bidiS', 'bidiWS' ]
utt_table = list(zip(script_names, ['PT_SC'] * len(script_names)))
utt_table += list(zip(category_names, ['PT_PC'] * len(category_names)))
utt_table += list(zip(general_category_names, ['PT_GC'] * len(general_category_names)))
# Create standardized versions of the names by lowercasing and removing
# ampersands.
# Now add our own specials.
def stdnames(x):
y = [''] * len(x)
for i in range(len(x)):
y[i] = x[i].lower().replace('_', '')
return y
utt_table.append(('Any', 'PT_ANY'))
utt_table.append(('L&', 'PT_LAMP'))
utt_table.append(('Xan', 'PT_ALNUM'))
utt_table.append(('Xps', 'PT_PXSPACE'))
utt_table.append(('Xsp', 'PT_SPACE'))
utt_table.append(('Xuc', 'PT_UCNC'))
utt_table.append(('Xwd', 'PT_WORD'))
std_script_names = stdnames(script_names)
std_category_names = stdnames(category_names)
std_general_category_names = stdnames(general_category_names)
std_bidiclass_names = stdnames(bidiclass_names)
# Create the table, starting with the Unicode script, category and bidi class
# names. We keep both the standardized name and the original, because the
# latter is used for the ucp_xx names.
utt_table = list(zip(std_script_names, script_names, ['PT_SC'] * len(script_names)))
utt_table += list(zip(std_category_names, category_names, ['PT_PC'] * len(category_names)))
utt_table += list(zip(std_general_category_names, general_category_names, ['PT_GC'] * len(general_category_names)))
utt_table += list(zip(std_bidiclass_names, bidiclass_names, ['PT_BIDICL'] * len(bidiclass_names)))
# Now add our own specials. Note both the standardized and capitalized forms
# are needed.
utt_table.append(('any', 'Any', 'PT_ANY'))
utt_table.append(('bidicontrol', 'Bidi_Control', 'PT_BIDICO'))
utt_table.append(('l&', 'L&', 'PT_LAMP'))
utt_table.append(('lc', 'LC', 'PT_LAMP'))
utt_table.append(('xan', 'Xan', 'PT_ALNUM'))
utt_table.append(('xps', 'Xps', 'PT_PXSPACE'))
utt_table.append(('xsp', 'Xsp', 'PT_SPACE'))
utt_table.append(('xuc', 'Xuc', 'PT_UCNC'))
utt_table.append(('xwd', 'Xwd', 'PT_WORD'))
# Sort the table.
@ -104,9 +132,7 @@ utt_table.sort()
for utt in utt_table:
print('#define STRING_%s0' % (utt[0].replace('&', '_AMPERSAND')), end=' ')
for c in utt[0]:
if c == '_':
print('STR_UNDERSCORE', end=' ')
elif c == '&':
if c == '&':
print('STR_AMPERSAND', end=' ')
else:
print('STR_%s' % c, end=' ');
@ -121,20 +147,18 @@ for utt in utt_table:
if utt == utt_table[-1]:
last = ';'
print(' STRING_%s0%s' % (utt[0].replace('&', '_AMPERSAND'), last))
# This was how it was done before the EBCDIC-compatible modification.
# print ' "%s\\0"%s' % (utt[0], last)
print('\nconst ucp_type_table PRIV(utt)[] = {')
offset = 0
last = ','
for utt in utt_table:
if utt[1] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE',
'PT_SPACE', 'PT_UCNC', 'PT_WORD'):
if utt[2] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE',
'PT_SPACE', 'PT_UCNC', 'PT_WORD', 'PT_BIDICO'):
value = '0'
else:
value = 'ucp_' + utt[0]
value = 'ucp_' + utt[1]
if utt == utt_table[-1]:
last = ''
print(' { %3d, %s, %s }%s' % (offset, utt[1], value, last))
print(' { %3d, %s, %s }%s' % (offset, utt[2], value, last))
offset += len(utt[0]) + 1
print('};')

View File

@ -34,11 +34,15 @@ return code is always zero.
There are three commands:
"findprop" must be followed by a space-separated list of Unicode code points as
hex numbers, either without any prefix or starting with "U+". The output is one
line per character, giving its Unicode properties followed by its other case or
cases if one or more exist, followed by its Script Extension list if it is not
just the same as the base script. This list is in square brackets. The
properties are:
hex numbers, either without any prefix or starting with "U+", or as individual
UTF-8 characters preceded by '+'. For example:
findprop U+1234 5Abc +?
The output is one line per character, giving its Unicode properties followed by
its other case or cases if one or more exist, followed by its Script Extension
list if it is not just the same as the base script. This list is in square
brackets. The properties are:
Bidi control shown as '*' if true
Bidi class e.g. NSM (most common is L)
@ -47,9 +51,13 @@ Specific type e.g. Upper case letter
Script e.g. Medefaidrin
Grapheme break type e.g. Extend (most common is Other)
The scripts names are all in lower case, with underscores removed, because
that's how they are stored for "loose" matching.
"find" must be followed by a list of property names and their values. The
values are case-sensitive. This finds characters that have those properties. If
multiple properties are listed, they must all be matched. Currently supported:
values are case-sensitive, except for bidi class. This finds characters that
have those properties. If multiple properties are listed, they must all be
matched. Currently supported:
script <name> The character must have this script property. Only one
such script may be given.
@ -202,6 +210,41 @@ static const unsigned int utf8_table1[] = {
static const int utf8_table2[] = {
0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
/* Macro to pick up the remaining bytes of a UTF-8 character, advancing
the pointer. */
#define GETUTF8INC(c, eptr) \
{ \
if ((c & 0x20u) == 0) \
c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \
else if ((c & 0x10u) == 0) \
{ \
c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \
eptr += 2; \
} \
else if ((c & 0x08u) == 0) \
{ \
c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \
((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
eptr += 3; \
} \
else if ((c & 0x04u) == 0) \
{ \
c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \
((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \
(eptr[3] & 0x3fu); \
eptr += 4; \
} \
else \
{ \
c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \
((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \
((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \
eptr += 5; \
} \
}
/*************************************************
* Convert character value to UTF-8 *
@ -267,6 +310,7 @@ for (i = 0; i < PRIV(utt_size); i++)
u = PRIV(utt) + i;
if (u->type == PT_SC && u->value == script) break;
}
if (i < PRIV(utt_size))
return PRIV(utt_names) + u->name_offset;
@ -601,7 +645,7 @@ while (*s != 0)
}
for (i = 0; i < sizeof(bd_names)/sizeof(char *); i += 2)
{
if (strcmp(CS (value + offset), CS bd_names[i]) == 0)
if (strcasecmp(CS (value + offset), CS bd_names[i]) == 0)
{
bidiclass = i/2;
break;
@ -787,12 +831,26 @@ if (strcmp(CS name, "findprop") == 0)
unsigned int c;
unsigned char *endptr;
t = s;
if (*t == '+')
{
c = *(++t);
if (c > 0x7fu)
{
GETCHARINC(c, t);
}
endptr = t+1;
}
else
{
if (strncmp(CS t, "U+", 2) == 0) t += 2;
c = strtoul(CS t, CSS(&endptr), 16);
}
if (*endptr != 0 && !isspace(*endptr))
{
while (*endptr != 0 && !isspace(*endptr)) endptr++;
printf("** Invalid hex number: ignored \"%.*s\"\n", (int)(endptr-s), s);
printf("** Invalid character specifier: ignored \"%.*s\"\n", (int)(endptr-s), s);
}
else
{
@ -884,19 +942,19 @@ if (argc > 1 && strcmp(argv[1], "-s") == 0)
if (argc > first_arg)
{
int i;
BOOL hexfirst = TRUE;
BOOL datafirst = TRUE;
char *arg = argv[first_arg];
unsigned char *s = buffer;
if (strncmp(arg, "U+", 2) != 0 && !isdigit(*arg))
if (*arg != '+' && strncmp(arg, "U+", 2) != 0 && !isdigit(*arg))
{
while (*arg != 0)
{
if (!isxdigit(*arg++)) { hexfirst = FALSE; break; }
if (!isxdigit(*arg++)) { datafirst = FALSE; break; }
}
}
if (hexfirst)
if (datafirst)
{
strcpy(CS s, "findprop ");
s += 9;

View File

@ -1,409 +1,409 @@
findprop 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
U+0000 BN Control: Control, Common, Control
U+0001 BN Control: Control, Common, Control
U+0002 BN Control: Control, Common, Control
U+0003 BN Control: Control, Common, Control
U+0004 BN Control: Control, Common, Control
U+0005 BN Control: Control, Common, Control
U+0006 BN Control: Control, Common, Control
U+0007 BN Control: Control, Common, Control
U+0008 BN Control: Control, Common, Control
U+0009 S Control: Control, Common, Control
U+000A B Control: Control, Common, LF
U+000B S Control: Control, Common, Control
U+000C WS Control: Control, Common, Control
U+000D B Control: Control, Common, CR
U+000E BN Control: Control, Common, Control
U+000F BN Control: Control, Common, Control
U+0000 BN Control: Control, common, Control
U+0001 BN Control: Control, common, Control
U+0002 BN Control: Control, common, Control
U+0003 BN Control: Control, common, Control
U+0004 BN Control: Control, common, Control
U+0005 BN Control: Control, common, Control
U+0006 BN Control: Control, common, Control
U+0007 BN Control: Control, common, Control
U+0008 BN Control: Control, common, Control
U+0009 S Control: Control, common, Control
U+000A B Control: Control, common, LF
U+000B S Control: Control, common, Control
U+000C WS Control: Control, common, Control
U+000D B Control: Control, common, CR
U+000E BN Control: Control, common, Control
U+000F BN Control: Control, common, Control
findprop 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
U+0010 BN Control: Control, Common, Control
U+0011 BN Control: Control, Common, Control
U+0012 BN Control: Control, Common, Control
U+0013 BN Control: Control, Common, Control
U+0014 BN Control: Control, Common, Control
U+0015 BN Control: Control, Common, Control
U+0016 BN Control: Control, Common, Control
U+0017 BN Control: Control, Common, Control
U+0018 BN Control: Control, Common, Control
U+0019 BN Control: Control, Common, Control
U+001A BN Control: Control, Common, Control
U+001B BN Control: Control, Common, Control
U+001C B Control: Control, Common, Control
U+001D B Control: Control, Common, Control
U+001E B Control: Control, Common, Control
U+001F S Control: Control, Common, Control
U+0010 BN Control: Control, common, Control
U+0011 BN Control: Control, common, Control
U+0012 BN Control: Control, common, Control
U+0013 BN Control: Control, common, Control
U+0014 BN Control: Control, common, Control
U+0015 BN Control: Control, common, Control
U+0016 BN Control: Control, common, Control
U+0017 BN Control: Control, common, Control
U+0018 BN Control: Control, common, Control
U+0019 BN Control: Control, common, Control
U+001A BN Control: Control, common, Control
U+001B BN Control: Control, common, Control
U+001C B Control: Control, common, Control
U+001D B Control: Control, common, Control
U+001E B Control: Control, common, Control
U+001F S Control: Control, common, Control
findprop 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
U+0020 WS Separator: Space separator, Common, Other
U+0021 ON Punctuation: Other punctuation, Common, Other
U+0022 ON Punctuation: Other punctuation, Common, Other
U+0023 ET Punctuation: Other punctuation, Common, Other
U+0024 ET Symbol: Currency symbol, Common, Other
U+0025 ET Punctuation: Other punctuation, Common, Other
U+0026 ON Punctuation: Other punctuation, Common, Other
U+0027 ON Punctuation: Other punctuation, Common, Other
U+0028 ON Punctuation: Open punctuation, Common, Other
U+0029 ON Punctuation: Close punctuation, Common, Other
U+002A ON Punctuation: Other punctuation, Common, Other
U+002B ES Symbol: Mathematical symbol, Common, Other
U+002C CS Punctuation: Other punctuation, Common, Other
U+002D ES Punctuation: Dash punctuation, Common, Other
U+002E CS Punctuation: Other punctuation, Common, Other
U+002F CS Punctuation: Other punctuation, Common, Other
U+0020 WS Separator: Space separator, common, Other
U+0021 ON Punctuation: Other punctuation, common, Other
U+0022 ON Punctuation: Other punctuation, common, Other
U+0023 ET Punctuation: Other punctuation, common, Other
U+0024 ET Symbol: Currency symbol, common, Other
U+0025 ET Punctuation: Other punctuation, common, Other
U+0026 ON Punctuation: Other punctuation, common, Other
U+0027 ON Punctuation: Other punctuation, common, Other
U+0028 ON Punctuation: Open punctuation, common, Other
U+0029 ON Punctuation: Close punctuation, common, Other
U+002A ON Punctuation: Other punctuation, common, Other
U+002B ES Symbol: Mathematical symbol, common, Other
U+002C CS Punctuation: Other punctuation, common, Other
U+002D ES Punctuation: Dash punctuation, common, Other
U+002E CS Punctuation: Other punctuation, common, Other
U+002F CS Punctuation: Other punctuation, common, Other
findprop 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
U+0030 EN Number: Decimal number, Common, Other
U+0031 EN Number: Decimal number, Common, Other
U+0032 EN Number: Decimal number, Common, Other
U+0033 EN Number: Decimal number, Common, Other
U+0034 EN Number: Decimal number, Common, Other
U+0035 EN Number: Decimal number, Common, Other
U+0036 EN Number: Decimal number, Common, Other
U+0037 EN Number: Decimal number, Common, Other
U+0038 EN Number: Decimal number, Common, Other
U+0039 EN Number: Decimal number, Common, Other
U+003A CS Punctuation: Other punctuation, Common, Other
U+003B ON Punctuation: Other punctuation, Common, Other
U+003C ON Symbol: Mathematical symbol, Common, Other
U+003D ON Symbol: Mathematical symbol, Common, Other
U+003E ON Symbol: Mathematical symbol, Common, Other
U+003F ON Punctuation: Other punctuation, Common, Other
U+0030 EN Number: Decimal number, common, Other
U+0031 EN Number: Decimal number, common, Other
U+0032 EN Number: Decimal number, common, Other
U+0033 EN Number: Decimal number, common, Other
U+0034 EN Number: Decimal number, common, Other
U+0035 EN Number: Decimal number, common, Other
U+0036 EN Number: Decimal number, common, Other
U+0037 EN Number: Decimal number, common, Other
U+0038 EN Number: Decimal number, common, Other
U+0039 EN Number: Decimal number, common, Other
U+003A CS Punctuation: Other punctuation, common, Other
U+003B ON Punctuation: Other punctuation, common, Other
U+003C ON Symbol: Mathematical symbol, common, Other
U+003D ON Symbol: Mathematical symbol, common, Other
U+003E ON Symbol: Mathematical symbol, common, Other
U+003F ON Punctuation: Other punctuation, common, Other
findprop 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
U+0040 ON Punctuation: Other punctuation, Common, Other
U+0041 L Letter: Upper case letter, Latin, Other, U+0061
U+0042 L Letter: Upper case letter, Latin, Other, U+0062
U+0043 L Letter: Upper case letter, Latin, Other, U+0063
U+0044 L Letter: Upper case letter, Latin, Other, U+0064
U+0045 L Letter: Upper case letter, Latin, Other, U+0065
U+0046 L Letter: Upper case letter, Latin, Other, U+0066
U+0047 L Letter: Upper case letter, Latin, Other, U+0067
U+0048 L Letter: Upper case letter, Latin, Other, U+0068
U+0049 L Letter: Upper case letter, Latin, Other, U+0069
U+004A L Letter: Upper case letter, Latin, Other, U+006A
U+004B L Letter: Upper case letter, Latin, Other, U+006B, U+212A
U+004C L Letter: Upper case letter, Latin, Other, U+006C
U+004D L Letter: Upper case letter, Latin, Other, U+006D
U+004E L Letter: Upper case letter, Latin, Other, U+006E
U+004F L Letter: Upper case letter, Latin, Other, U+006F
U+0040 ON Punctuation: Other punctuation, common, Other
U+0041 L Letter: Upper case letter, latin, Other, U+0061
U+0042 L Letter: Upper case letter, latin, Other, U+0062
U+0043 L Letter: Upper case letter, latin, Other, U+0063
U+0044 L Letter: Upper case letter, latin, Other, U+0064
U+0045 L Letter: Upper case letter, latin, Other, U+0065
U+0046 L Letter: Upper case letter, latin, Other, U+0066
U+0047 L Letter: Upper case letter, latin, Other, U+0067
U+0048 L Letter: Upper case letter, latin, Other, U+0068
U+0049 L Letter: Upper case letter, latin, Other, U+0069
U+004A L Letter: Upper case letter, latin, Other, U+006A
U+004B L Letter: Upper case letter, latin, Other, U+006B, U+212A
U+004C L Letter: Upper case letter, latin, Other, U+006C
U+004D L Letter: Upper case letter, latin, Other, U+006D
U+004E L Letter: Upper case letter, latin, Other, U+006E
U+004F L Letter: Upper case letter, latin, Other, U+006F
findprop 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
U+0050 L Letter: Upper case letter, Latin, Other, U+0070
U+0051 L Letter: Upper case letter, Latin, Other, U+0071
U+0052 L Letter: Upper case letter, Latin, Other, U+0072
U+0053 L Letter: Upper case letter, Latin, Other, U+0073, U+017F
U+0054 L Letter: Upper case letter, Latin, Other, U+0074
U+0055 L Letter: Upper case letter, Latin, Other, U+0075
U+0056 L Letter: Upper case letter, Latin, Other, U+0076
U+0057 L Letter: Upper case letter, Latin, Other, U+0077
U+0058 L Letter: Upper case letter, Latin, Other, U+0078
U+0059 L Letter: Upper case letter, Latin, Other, U+0079
U+005A L Letter: Upper case letter, Latin, Other, U+007A
U+005B ON Punctuation: Open punctuation, Common, Other
U+005C ON Punctuation: Other punctuation, Common, Other
U+005D ON Punctuation: Close punctuation, Common, Other
U+005E ON Symbol: Modifier symbol, Common, Other
U+005F ON Punctuation: Connector punctuation, Common, Other
U+0050 L Letter: Upper case letter, latin, Other, U+0070
U+0051 L Letter: Upper case letter, latin, Other, U+0071
U+0052 L Letter: Upper case letter, latin, Other, U+0072
U+0053 L Letter: Upper case letter, latin, Other, U+0073, U+017F
U+0054 L Letter: Upper case letter, latin, Other, U+0074
U+0055 L Letter: Upper case letter, latin, Other, U+0075
U+0056 L Letter: Upper case letter, latin, Other, U+0076
U+0057 L Letter: Upper case letter, latin, Other, U+0077
U+0058 L Letter: Upper case letter, latin, Other, U+0078
U+0059 L Letter: Upper case letter, latin, Other, U+0079
U+005A L Letter: Upper case letter, latin, Other, U+007A
U+005B ON Punctuation: Open punctuation, common, Other
U+005C ON Punctuation: Other punctuation, common, Other
U+005D ON Punctuation: Close punctuation, common, Other
U+005E ON Symbol: Modifier symbol, common, Other
U+005F ON Punctuation: Connector punctuation, common, Other
findprop 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
U+0060 ON Symbol: Modifier symbol, Common, Other
U+0061 L Letter: Lower case letter, Latin, Other, U+0041
U+0062 L Letter: Lower case letter, Latin, Other, U+0042
U+0063 L Letter: Lower case letter, Latin, Other, U+0043
U+0064 L Letter: Lower case letter, Latin, Other, U+0044
U+0065 L Letter: Lower case letter, Latin, Other, U+0045
U+0066 L Letter: Lower case letter, Latin, Other, U+0046
U+0067 L Letter: Lower case letter, Latin, Other, U+0047
U+0068 L Letter: Lower case letter, Latin, Other, U+0048
U+0069 L Letter: Lower case letter, Latin, Other, U+0049
U+006A L Letter: Lower case letter, Latin, Other, U+004A
U+006B L Letter: Lower case letter, Latin, Other, U+004B, U+212A
U+006C L Letter: Lower case letter, Latin, Other, U+004C
U+006D L Letter: Lower case letter, Latin, Other, U+004D
U+006E L Letter: Lower case letter, Latin, Other, U+004E
U+006F L Letter: Lower case letter, Latin, Other, U+004F
U+0060 ON Symbol: Modifier symbol, common, Other
U+0061 L Letter: Lower case letter, latin, Other, U+0041
U+0062 L Letter: Lower case letter, latin, Other, U+0042
U+0063 L Letter: Lower case letter, latin, Other, U+0043
U+0064 L Letter: Lower case letter, latin, Other, U+0044
U+0065 L Letter: Lower case letter, latin, Other, U+0045
U+0066 L Letter: Lower case letter, latin, Other, U+0046
U+0067 L Letter: Lower case letter, latin, Other, U+0047
U+0068 L Letter: Lower case letter, latin, Other, U+0048
U+0069 L Letter: Lower case letter, latin, Other, U+0049
U+006A L Letter: Lower case letter, latin, Other, U+004A
U+006B L Letter: Lower case letter, latin, Other, U+004B, U+212A
U+006C L Letter: Lower case letter, latin, Other, U+004C
U+006D L Letter: Lower case letter, latin, Other, U+004D
U+006E L Letter: Lower case letter, latin, Other, U+004E
U+006F L Letter: Lower case letter, latin, Other, U+004F
findprop 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
U+0070 L Letter: Lower case letter, Latin, Other, U+0050
U+0071 L Letter: Lower case letter, Latin, Other, U+0051
U+0072 L Letter: Lower case letter, Latin, Other, U+0052
U+0073 L Letter: Lower case letter, Latin, Other, U+0053, U+017F
U+0074 L Letter: Lower case letter, Latin, Other, U+0054
U+0075 L Letter: Lower case letter, Latin, Other, U+0055
U+0076 L Letter: Lower case letter, Latin, Other, U+0056
U+0077 L Letter: Lower case letter, Latin, Other, U+0057
U+0078 L Letter: Lower case letter, Latin, Other, U+0058
U+0079 L Letter: Lower case letter, Latin, Other, U+0059
U+007A L Letter: Lower case letter, Latin, Other, U+005A
U+007B ON Punctuation: Open punctuation, Common, Other
U+007C ON Symbol: Mathematical symbol, Common, Other
U+007D ON Punctuation: Close punctuation, Common, Other
U+007E ON Symbol: Mathematical symbol, Common, Other
U+007F BN Control: Control, Common, Control
U+0070 L Letter: Lower case letter, latin, Other, U+0050
U+0071 L Letter: Lower case letter, latin, Other, U+0051
U+0072 L Letter: Lower case letter, latin, Other, U+0052
U+0073 L Letter: Lower case letter, latin, Other, U+0053, U+017F
U+0074 L Letter: Lower case letter, latin, Other, U+0054
U+0075 L Letter: Lower case letter, latin, Other, U+0055
U+0076 L Letter: Lower case letter, latin, Other, U+0056
U+0077 L Letter: Lower case letter, latin, Other, U+0057
U+0078 L Letter: Lower case letter, latin, Other, U+0058
U+0079 L Letter: Lower case letter, latin, Other, U+0059
U+007A L Letter: Lower case letter, latin, Other, U+005A
U+007B ON Punctuation: Open punctuation, common, Other
U+007C ON Symbol: Mathematical symbol, common, Other
U+007D ON Punctuation: Close punctuation, common, Other
U+007E ON Symbol: Mathematical symbol, common, Other
U+007F BN Control: Control, common, Control
findprop 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
U+0080 BN Control: Control, Common, Control
U+0081 BN Control: Control, Common, Control
U+0082 BN Control: Control, Common, Control
U+0083 BN Control: Control, Common, Control
U+0084 BN Control: Control, Common, Control
U+0085 B Control: Control, Common, Control
U+0086 BN Control: Control, Common, Control
U+0087 BN Control: Control, Common, Control
U+0088 BN Control: Control, Common, Control
U+0089 BN Control: Control, Common, Control
U+008A BN Control: Control, Common, Control
U+008B BN Control: Control, Common, Control
U+008C BN Control: Control, Common, Control
U+008D BN Control: Control, Common, Control
U+008E BN Control: Control, Common, Control
U+008F BN Control: Control, Common, Control
U+0080 BN Control: Control, common, Control
U+0081 BN Control: Control, common, Control
U+0082 BN Control: Control, common, Control
U+0083 BN Control: Control, common, Control
U+0084 BN Control: Control, common, Control
U+0085 B Control: Control, common, Control
U+0086 BN Control: Control, common, Control
U+0087 BN Control: Control, common, Control
U+0088 BN Control: Control, common, Control
U+0089 BN Control: Control, common, Control
U+008A BN Control: Control, common, Control
U+008B BN Control: Control, common, Control
U+008C BN Control: Control, common, Control
U+008D BN Control: Control, common, Control
U+008E BN Control: Control, common, Control
U+008F BN Control: Control, common, Control
findprop 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f
U+0090 BN Control: Control, Common, Control
U+0091 BN Control: Control, Common, Control
U+0092 BN Control: Control, Common, Control
U+0093 BN Control: Control, Common, Control
U+0094 BN Control: Control, Common, Control
U+0095 BN Control: Control, Common, Control
U+0096 BN Control: Control, Common, Control
U+0097 BN Control: Control, Common, Control
U+0098 BN Control: Control, Common, Control
U+0099 BN Control: Control, Common, Control
U+009A BN Control: Control, Common, Control
U+009B BN Control: Control, Common, Control
U+009C BN Control: Control, Common, Control
U+009D BN Control: Control, Common, Control
U+009E BN Control: Control, Common, Control
U+009F BN Control: Control, Common, Control
U+0090 BN Control: Control, common, Control
U+0091 BN Control: Control, common, Control
U+0092 BN Control: Control, common, Control
U+0093 BN Control: Control, common, Control
U+0094 BN Control: Control, common, Control
U+0095 BN Control: Control, common, Control
U+0096 BN Control: Control, common, Control
U+0097 BN Control: Control, common, Control
U+0098 BN Control: Control, common, Control
U+0099 BN Control: Control, common, Control
U+009A BN Control: Control, common, Control
U+009B BN Control: Control, common, Control
U+009C BN Control: Control, common, Control
U+009D BN Control: Control, common, Control
U+009E BN Control: Control, common, Control
U+009F BN Control: Control, common, Control
findprop a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af
U+00A0 CS Separator: Space separator, Common, Other
U+00A1 ON Punctuation: Other punctuation, Common, Other
U+00A2 ET Symbol: Currency symbol, Common, Other
U+00A3 ET Symbol: Currency symbol, Common, Other
U+00A4 ET Symbol: Currency symbol, Common, Other
U+00A5 ET Symbol: Currency symbol, Common, Other
U+00A6 ON Symbol: Other symbol, Common, Other
U+00A7 ON Punctuation: Other punctuation, Common, Other
U+00A8 ON Symbol: Modifier symbol, Common, Other
U+00A9 ON Symbol: Other symbol, Common, Extended Pictographic
U+00AA L Letter: Other letter, Latin, Other
U+00AB ON Punctuation: Initial punctuation, Common, Other
U+00AC ON Symbol: Mathematical symbol, Common, Other
U+00AD BN Control: Format, Common, Control
U+00AE ON Symbol: Other symbol, Common, Extended Pictographic
U+00AF ON Symbol: Modifier symbol, Common, Other
U+00A0 CS Separator: Space separator, common, Other
U+00A1 ON Punctuation: Other punctuation, common, Other
U+00A2 ET Symbol: Currency symbol, common, Other
U+00A3 ET Symbol: Currency symbol, common, Other
U+00A4 ET Symbol: Currency symbol, common, Other
U+00A5 ET Symbol: Currency symbol, common, Other
U+00A6 ON Symbol: Other symbol, common, Other
U+00A7 ON Punctuation: Other punctuation, common, Other
U+00A8 ON Symbol: Modifier symbol, common, Other
U+00A9 ON Symbol: Other symbol, common, Extended Pictographic
U+00AA L Letter: Other letter, latin, Other
U+00AB ON Punctuation: Initial punctuation, common, Other
U+00AC ON Symbol: Mathematical symbol, common, Other
U+00AD BN Control: Format, common, Control
U+00AE ON Symbol: Other symbol, common, Extended Pictographic
U+00AF ON Symbol: Modifier symbol, common, Other
findprop b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf
U+00B0 ET Symbol: Other symbol, Common, Other
U+00B1 ET Symbol: Mathematical symbol, Common, Other
U+00B2 EN Number: Other number, Common, Other
U+00B3 EN Number: Other number, Common, Other
U+00B4 ON Symbol: Modifier symbol, Common, Other
U+00B5 L Letter: Lower case letter, Common, Other, U+03BC, U+039C
U+00B6 ON Punctuation: Other punctuation, Common, Other
U+00B7 ON Punctuation: Other punctuation, Common, Other
U+00B8 ON Symbol: Modifier symbol, Common, Other
U+00B9 EN Number: Other number, Common, Other
U+00BA L Letter: Other letter, Latin, Other
U+00BB ON Punctuation: Final punctuation, Common, Other
U+00BC ON Number: Other number, Common, Other
U+00BD ON Number: Other number, Common, Other
U+00BE ON Number: Other number, Common, Other
U+00BF ON Punctuation: Other punctuation, Common, Other
U+00B0 ET Symbol: Other symbol, common, Other
U+00B1 ET Symbol: Mathematical symbol, common, Other
U+00B2 EN Number: Other number, common, Other
U+00B3 EN Number: Other number, common, Other
U+00B4 ON Symbol: Modifier symbol, common, Other
U+00B5 L Letter: Lower case letter, common, Other, U+03BC, U+039C
U+00B6 ON Punctuation: Other punctuation, common, Other
U+00B7 ON Punctuation: Other punctuation, common, Other
U+00B8 ON Symbol: Modifier symbol, common, Other
U+00B9 EN Number: Other number, common, Other
U+00BA L Letter: Other letter, latin, Other
U+00BB ON Punctuation: Final punctuation, common, Other
U+00BC ON Number: Other number, common, Other
U+00BD ON Number: Other number, common, Other
U+00BE ON Number: Other number, common, Other
U+00BF ON Punctuation: Other punctuation, common, Other
findprop c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf
U+00C0 L Letter: Upper case letter, Latin, Other, U+00E0
U+00C1 L Letter: Upper case letter, Latin, Other, U+00E1
U+00C2 L Letter: Upper case letter, Latin, Other, U+00E2
U+00C3 L Letter: Upper case letter, Latin, Other, U+00E3
U+00C4 L Letter: Upper case letter, Latin, Other, U+00E4
U+00C5 L Letter: Upper case letter, Latin, Other, U+00E5, U+212B
U+00C6 L Letter: Upper case letter, Latin, Other, U+00E6
U+00C7 L Letter: Upper case letter, Latin, Other, U+00E7
U+00C8 L Letter: Upper case letter, Latin, Other, U+00E8
U+00C9 L Letter: Upper case letter, Latin, Other, U+00E9
U+00CA L Letter: Upper case letter, Latin, Other, U+00EA
U+00CB L Letter: Upper case letter, Latin, Other, U+00EB
U+00CC L Letter: Upper case letter, Latin, Other, U+00EC
U+00CD L Letter: Upper case letter, Latin, Other, U+00ED
U+00CE L Letter: Upper case letter, Latin, Other, U+00EE
U+00CF L Letter: Upper case letter, Latin, Other, U+00EF
U+00C0 L Letter: Upper case letter, latin, Other, U+00E0
U+00C1 L Letter: Upper case letter, latin, Other, U+00E1
U+00C2 L Letter: Upper case letter, latin, Other, U+00E2
U+00C3 L Letter: Upper case letter, latin, Other, U+00E3
U+00C4 L Letter: Upper case letter, latin, Other, U+00E4
U+00C5 L Letter: Upper case letter, latin, Other, U+00E5, U+212B
U+00C6 L Letter: Upper case letter, latin, Other, U+00E6
U+00C7 L Letter: Upper case letter, latin, Other, U+00E7
U+00C8 L Letter: Upper case letter, latin, Other, U+00E8
U+00C9 L Letter: Upper case letter, latin, Other, U+00E9
U+00CA L Letter: Upper case letter, latin, Other, U+00EA
U+00CB L Letter: Upper case letter, latin, Other, U+00EB
U+00CC L Letter: Upper case letter, latin, Other, U+00EC
U+00CD L Letter: Upper case letter, latin, Other, U+00ED
U+00CE L Letter: Upper case letter, latin, Other, U+00EE
U+00CF L Letter: Upper case letter, latin, Other, U+00EF
findprop d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db dc dd de df
U+00D0 L Letter: Upper case letter, Latin, Other, U+00F0
U+00D1 L Letter: Upper case letter, Latin, Other, U+00F1
U+00D2 L Letter: Upper case letter, Latin, Other, U+00F2
U+00D3 L Letter: Upper case letter, Latin, Other, U+00F3
U+00D4 L Letter: Upper case letter, Latin, Other, U+00F4
U+00D5 L Letter: Upper case letter, Latin, Other, U+00F5
U+00D6 L Letter: Upper case letter, Latin, Other, U+00F6
U+00D7 ON Symbol: Mathematical symbol, Common, Other
U+00D8 L Letter: Upper case letter, Latin, Other, U+00F8
U+00D9 L Letter: Upper case letter, Latin, Other, U+00F9
U+00DA L Letter: Upper case letter, Latin, Other, U+00FA
U+00DB L Letter: Upper case letter, Latin, Other, U+00FB
U+00DC L Letter: Upper case letter, Latin, Other, U+00FC
U+00DD L Letter: Upper case letter, Latin, Other, U+00FD
U+00DE L Letter: Upper case letter, Latin, Other, U+00FE
U+00DF L Letter: Lower case letter, Latin, Other, U+1E9E
U+00D0 L Letter: Upper case letter, latin, Other, U+00F0
U+00D1 L Letter: Upper case letter, latin, Other, U+00F1
U+00D2 L Letter: Upper case letter, latin, Other, U+00F2
U+00D3 L Letter: Upper case letter, latin, Other, U+00F3
U+00D4 L Letter: Upper case letter, latin, Other, U+00F4
U+00D5 L Letter: Upper case letter, latin, Other, U+00F5
U+00D6 L Letter: Upper case letter, latin, Other, U+00F6
U+00D7 ON Symbol: Mathematical symbol, common, Other
U+00D8 L Letter: Upper case letter, latin, Other, U+00F8
U+00D9 L Letter: Upper case letter, latin, Other, U+00F9
U+00DA L Letter: Upper case letter, latin, Other, U+00FA
U+00DB L Letter: Upper case letter, latin, Other, U+00FB
U+00DC L Letter: Upper case letter, latin, Other, U+00FC
U+00DD L Letter: Upper case letter, latin, Other, U+00FD
U+00DE L Letter: Upper case letter, latin, Other, U+00FE
U+00DF L Letter: Lower case letter, latin, Other, U+1E9E
findprop e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ea eb ec ed ee ef
U+00E0 L Letter: Lower case letter, Latin, Other, U+00C0
U+00E1 L Letter: Lower case letter, Latin, Other, U+00C1
U+00E2 L Letter: Lower case letter, Latin, Other, U+00C2
U+00E3 L Letter: Lower case letter, Latin, Other, U+00C3
U+00E4 L Letter: Lower case letter, Latin, Other, U+00C4
U+00E5 L Letter: Lower case letter, Latin, Other, U+00C5, U+212B
U+00E6 L Letter: Lower case letter, Latin, Other, U+00C6
U+00E7 L Letter: Lower case letter, Latin, Other, U+00C7
U+00E8 L Letter: Lower case letter, Latin, Other, U+00C8
U+00E9 L Letter: Lower case letter, Latin, Other, U+00C9
U+00EA L Letter: Lower case letter, Latin, Other, U+00CA
U+00EB L Letter: Lower case letter, Latin, Other, U+00CB
U+00EC L Letter: Lower case letter, Latin, Other, U+00CC
U+00ED L Letter: Lower case letter, Latin, Other, U+00CD
U+00EE L Letter: Lower case letter, Latin, Other, U+00CE
U+00EF L Letter: Lower case letter, Latin, Other, U+00CF
U+00E0 L Letter: Lower case letter, latin, Other, U+00C0
U+00E1 L Letter: Lower case letter, latin, Other, U+00C1
U+00E2 L Letter: Lower case letter, latin, Other, U+00C2
U+00E3 L Letter: Lower case letter, latin, Other, U+00C3
U+00E4 L Letter: Lower case letter, latin, Other, U+00C4
U+00E5 L Letter: Lower case letter, latin, Other, U+00C5, U+212B
U+00E6 L Letter: Lower case letter, latin, Other, U+00C6
U+00E7 L Letter: Lower case letter, latin, Other, U+00C7
U+00E8 L Letter: Lower case letter, latin, Other, U+00C8
U+00E9 L Letter: Lower case letter, latin, Other, U+00C9
U+00EA L Letter: Lower case letter, latin, Other, U+00CA
U+00EB L Letter: Lower case letter, latin, Other, U+00CB
U+00EC L Letter: Lower case letter, latin, Other, U+00CC
U+00ED L Letter: Lower case letter, latin, Other, U+00CD
U+00EE L Letter: Lower case letter, latin, Other, U+00CE
U+00EF L Letter: Lower case letter, latin, Other, U+00CF
findprop f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
U+00F0 L Letter: Lower case letter, Latin, Other, U+00D0
U+00F1 L Letter: Lower case letter, Latin, Other, U+00D1
U+00F2 L Letter: Lower case letter, Latin, Other, U+00D2
U+00F3 L Letter: Lower case letter, Latin, Other, U+00D3
U+00F4 L Letter: Lower case letter, Latin, Other, U+00D4
U+00F5 L Letter: Lower case letter, Latin, Other, U+00D5
U+00F6 L Letter: Lower case letter, Latin, Other, U+00D6
U+00F7 ON Symbol: Mathematical symbol, Common, Other
U+00F8 L Letter: Lower case letter, Latin, Other, U+00D8
U+00F9 L Letter: Lower case letter, Latin, Other, U+00D9
U+00FA L Letter: Lower case letter, Latin, Other, U+00DA
U+00FB L Letter: Lower case letter, Latin, Other, U+00DB
U+00FC L Letter: Lower case letter, Latin, Other, U+00DC
U+00FD L Letter: Lower case letter, Latin, Other, U+00DD
U+00FE L Letter: Lower case letter, Latin, Other, U+00DE
U+00FF L Letter: Lower case letter, Latin, Other, U+0178
U+00F0 L Letter: Lower case letter, latin, Other, U+00D0
U+00F1 L Letter: Lower case letter, latin, Other, U+00D1
U+00F2 L Letter: Lower case letter, latin, Other, U+00D2
U+00F3 L Letter: Lower case letter, latin, Other, U+00D3
U+00F4 L Letter: Lower case letter, latin, Other, U+00D4
U+00F5 L Letter: Lower case letter, latin, Other, U+00D5
U+00F6 L Letter: Lower case letter, latin, Other, U+00D6
U+00F7 ON Symbol: Mathematical symbol, common, Other
U+00F8 L Letter: Lower case letter, latin, Other, U+00D8
U+00F9 L Letter: Lower case letter, latin, Other, U+00D9
U+00FA L Letter: Lower case letter, latin, Other, U+00DA
U+00FB L Letter: Lower case letter, latin, Other, U+00DB
U+00FC L Letter: Lower case letter, latin, Other, U+00DC
U+00FD L Letter: Lower case letter, latin, Other, U+00DD
U+00FE L Letter: Lower case letter, latin, Other, U+00DE
U+00FF L Letter: Lower case letter, latin, Other, U+0178
findprop 0100 0101 0102 0103 0104 0105 0106
U+0100 L Letter: Upper case letter, Latin, Other, U+0101
U+0101 L Letter: Lower case letter, Latin, Other, U+0100
U+0102 L Letter: Upper case letter, Latin, Other, U+0103
U+0103 L Letter: Lower case letter, Latin, Other, U+0102
U+0104 L Letter: Upper case letter, Latin, Other, U+0105
U+0105 L Letter: Lower case letter, Latin, Other, U+0104
U+0106 L Letter: Upper case letter, Latin, Other, U+0107
U+0100 L Letter: Upper case letter, latin, Other, U+0101
U+0101 L Letter: Lower case letter, latin, Other, U+0100
U+0102 L Letter: Upper case letter, latin, Other, U+0103
U+0103 L Letter: Lower case letter, latin, Other, U+0102
U+0104 L Letter: Upper case letter, latin, Other, U+0105
U+0105 L Letter: Lower case letter, latin, Other, U+0104
U+0106 L Letter: Upper case letter, latin, Other, U+0107
findprop ffe0 ffe1 ffe2 ffe3 ffe4 ffe5 ffe6 ffe7
U+FFE0 ET Symbol: Currency symbol, Common, Other
U+FFE1 ET Symbol: Currency symbol, Common, Other
U+FFE2 ON Symbol: Mathematical symbol, Common, Other
U+FFE3 ON Symbol: Modifier symbol, Common, Other
U+FFE4 ON Symbol: Other symbol, Common, Other
U+FFE5 ET Symbol: Currency symbol, Common, Other
U+FFE6 ET Symbol: Currency symbol, Common, Other
U+FFE7 L Control: Unassigned, Unknown, Other
U+FFE0 ET Symbol: Currency symbol, common, Other
U+FFE1 ET Symbol: Currency symbol, common, Other
U+FFE2 ON Symbol: Mathematical symbol, common, Other
U+FFE3 ON Symbol: Modifier symbol, common, Other
U+FFE4 ON Symbol: Other symbol, common, Other
U+FFE5 ET Symbol: Currency symbol, common, Other
U+FFE6 ET Symbol: Currency symbol, common, Other
U+FFE7 L Control: Unassigned, unknown, Other
findprop ffe8 ffe9 ffea ffeb ffec ffed ffee ffef
U+FFE8 ON Symbol: Other symbol, Common, Other
U+FFE9 ON Symbol: Mathematical symbol, Common, Other
U+FFEA ON Symbol: Mathematical symbol, Common, Other
U+FFEB ON Symbol: Mathematical symbol, Common, Other
U+FFEC ON Symbol: Mathematical symbol, Common, Other
U+FFED ON Symbol: Other symbol, Common, Other
U+FFEE ON Symbol: Other symbol, Common, Other
U+FFEF L Control: Unassigned, Unknown, Other
U+FFE8 ON Symbol: Other symbol, common, Other
U+FFE9 ON Symbol: Mathematical symbol, common, Other
U+FFEA ON Symbol: Mathematical symbol, common, Other
U+FFEB ON Symbol: Mathematical symbol, common, Other
U+FFEC ON Symbol: Mathematical symbol, common, Other
U+FFED ON Symbol: Other symbol, common, Other
U+FFEE ON Symbol: Other symbol, common, Other
U+FFEF L Control: Unassigned, unknown, Other
findprop fff8 fff9 fffa fffb fffc fffd fffe ffff
U+FFF8 BN Control: Unassigned, Unknown, Control
U+FFF9 ON Control: Format, Common, Control
U+FFFA ON Control: Format, Common, Control
U+FFFB ON Control: Format, Common, Control
U+FFFC ON Symbol: Other symbol, Common, Other
U+FFFD ON Symbol: Other symbol, Common, Other
U+FFFE BN Control: Unassigned, Unknown, Other
U+FFFF BN Control: Unassigned, Unknown, Other
U+FFF8 BN Control: Unassigned, unknown, Control
U+FFF9 ON Control: Format, common, Control
U+FFFA ON Control: Format, common, Control
U+FFFB ON Control: Format, common, Control
U+FFFC ON Symbol: Other symbol, common, Other
U+FFFD ON Symbol: Other symbol, common, Other
U+FFFE BN Control: Unassigned, unknown, Other
U+FFFF BN Control: Unassigned, unknown, Other
findprop 10000 10001 e01ef f0000 100000
U+10000 L Letter: Other letter, Linear_B, Other
U+10001 L Letter: Other letter, Linear_B, Other
U+E01EF NSM Mark: Non-spacing mark, Inherited, Extend
U+F0000 L Control: Private use, Unknown, Other
U+100000 L Control: Private use, Unknown, Other
U+10000 L Letter: Other letter, linearb, Other
U+10001 L Letter: Other letter, linearb, Other
U+E01EF NSM Mark: Non-spacing mark, inherited, Extend
U+F0000 L Control: Private use, unknown, Other
U+100000 L Control: Private use, unknown, Other
findprop 1b00 12000 7c0 a840 10900
U+1B00 NSM Mark: Non-spacing mark, Balinese, Extend
U+12000 L Letter: Other letter, Cuneiform, Other
U+07C0 R Number: Decimal number, Nko, Other
U+A840 L Letter: Other letter, Phags_Pa, Other
U+10900 R Letter: Other letter, Phoenician, Other
U+1B00 NSM Mark: Non-spacing mark, balinese, Extend
U+12000 L Letter: Other letter, cuneiform, Other
U+07C0 R Number: Decimal number, nko, Other
U+A840 L Letter: Other letter, phagspa, Other
U+10900 R Letter: Other letter, phoenician, Other
findprop 1d79 a77d
U+1D79 L Letter: Lower case letter, Latin, Other, U+A77D
U+A77D L Letter: Upper case letter, Latin, Other, U+1D79
U+1D79 L Letter: Lower case letter, latin, Other, U+A77D
U+A77D L Letter: Upper case letter, latin, Other, U+1D79
findprop 0800 083e a4d0 a4f7 aa80 aadf
U+0800 R Letter: Other letter, Samaritan, Other
U+083E R Punctuation: Other punctuation, Samaritan, Other
U+A4D0 L Letter: Other letter, Lisu, Other
U+A4F7 L Letter: Other letter, Lisu, Other
U+AA80 L Letter: Other letter, Tai_Viet, Other
U+AADF L Punctuation: Other punctuation, Tai_Viet, Other
U+0800 R Letter: Other letter, samaritan, Other
U+083E R Punctuation: Other punctuation, samaritan, Other
U+A4D0 L Letter: Other letter, lisu, Other
U+A4F7 L Letter: Other letter, lisu, Other
U+AA80 L Letter: Other letter, taiviet, Other
U+AADF L Punctuation: Other punctuation, taiviet, Other
findprop 10b00 10b35 13000 1342e 10840 10855
U+10B00 R Letter: Other letter, Avestan, Other
U+10B35 R Letter: Other letter, Avestan, Other
U+13000 L Letter: Other letter, Egyptian_Hieroglyphs, Other
U+1342E L Letter: Other letter, Egyptian_Hieroglyphs, Other
U+10840 R Letter: Other letter, Imperial_Aramaic, Other
U+10855 R Letter: Other letter, Imperial_Aramaic, Other
U+10B00 R Letter: Other letter, avestan, Other
U+10B35 R Letter: Other letter, avestan, Other
U+13000 L Letter: Other letter, egyptianhieroglyphs, Other
U+1342E L Letter: Other letter, egyptianhieroglyphs, Other
U+10840 R Letter: Other letter, imperialaramaic, Other
U+10855 R Letter: Other letter, imperialaramaic, Other
findprop 11100 1113c 11680 116c0
U+11100 NSM Mark: Non-spacing mark, Chakma, Extend
U+1113C L Number: Decimal number, Chakma, Other
U+11680 L Letter: Other letter, Takri, Other
U+116C0 L Number: Decimal number, Takri, Other
U+11100 NSM Mark: Non-spacing mark, chakma, Extend
U+1113C L Number: Decimal number, chakma, Other
U+11680 L Letter: Other letter, takri, Other
U+116C0 L Number: Decimal number, takri, Other
findprop 0d 0a 0e 0711 1b04 1111 1169 11fe ae4c ad89
U+000D B Control: Control, Common, CR
U+000A B Control: Control, Common, LF
U+000E BN Control: Control, Common, Control
U+0711 NSM Mark: Non-spacing mark, Syriac, Extend
U+1B04 L Mark: Spacing mark, Balinese, SpacingMark
U+1111 L Letter: Other letter, Hangul, Hangul syllable type L
U+1169 L Letter: Other letter, Hangul, Hangul syllable type V
U+11FE L Letter: Other letter, Hangul, Hangul syllable type T
U+AE4C L Letter: Other letter, Hangul, Hangul syllable type LV
U+AD89 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+000D B Control: Control, common, CR
U+000A B Control: Control, common, LF
U+000E BN Control: Control, common, Control
U+0711 NSM Mark: Non-spacing mark, syriac, Extend
U+1B04 L Mark: Spacing mark, balinese, SpacingMark
U+1111 L Letter: Other letter, hangul, Hangul syllable type L
U+1169 L Letter: Other letter, hangul, Hangul syllable type V
U+11FE L Letter: Other letter, hangul, Hangul syllable type T
U+AE4C L Letter: Other letter, hangul, Hangul syllable type LV
U+AD89 L Letter: Other letter, hangul, Hangul syllable type LVT
findprop 118a0 11ac7 16ad0
U+118A0 L Letter: Upper case letter, Warang_Citi, Other, U+118C0
U+11AC7 L Letter: Other letter, Pau_Cin_Hau, Other
U+16AD0 L Letter: Other letter, Bassa_Vah, Other
U+118A0 L Letter: Upper case letter, warangciti, Other, U+118C0
U+11AC7 L Letter: Other letter, paucinhau, Other
U+16AD0 L Letter: Other letter, bassavah, Other
findprop 11700 14400 108e0 11280 1d800
U+11700 L Letter: Other letter, Ahom, Other
U+14400 L Letter: Other letter, Anatolian_Hieroglyphs, Other
U+108E0 R Letter: Other letter, Hatran, Other
U+11280 L Letter: Other letter, Multani, Other
U+1D800 L Symbol: Other symbol, SignWriting, Other
U+11700 L Letter: Other letter, ahom, Other
U+14400 L Letter: Other letter, anatolianhieroglyphs, Other
U+108E0 R Letter: Other letter, hatran, Other
U+11280 L Letter: Other letter, multani, Other
U+1D800 L Symbol: Other symbol, signwriting, Other
findprop 11800 1e903 11da9 10d27 11ee0 16e48 10f27 10f30
U+11800 L Letter: Other letter, Dogra, Other
U+1E903 R Letter: Upper case letter, Adlam, Other, U+1E925
U+11DA9 L Number: Decimal number, Gunjala_Gondi, Other
U+10D27 NSM Mark: Non-spacing mark, Hanifi_Rohingya, Extend
U+11EE0 L Letter: Other letter, Makasar, Other
U+16E48 L Letter: Upper case letter, Medefaidrin, Other, U+16E68
U+10F27 R Letter: Other letter, Old_Sogdian, Other
U+10F30 AL Letter: Other letter, Sogdian, Other
U+11800 L Letter: Other letter, dogra, Other
U+1E903 R Letter: Upper case letter, adlam, Other, U+1E925
U+11DA9 L Number: Decimal number, gunjalagondi, Other
U+10D27 NSM Mark: Non-spacing mark, hanifirohingya, Extend
U+11EE0 L Letter: Other letter, makasar, Other
U+16E48 L Letter: Upper case letter, medefaidrin, Other, U+16E68
U+10F27 R Letter: Other letter, oldsogdian, Other
U+10F30 AL Letter: Other letter, sogdian, Other
findprop a836 a833 1cf4 20f0 1cd0
U+A836 L Symbol: Other symbol, Common, Other, [Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kaithi, Mahajani, Modi, Khudawadi, Takri, Tirhuta]
U+A833 L Number: Other number, Common, Other, [Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kannada, Kaithi, Mahajani, Modi, Nandinagari, Khudawadi, Takri, Tirhuta]
U+1CF4 NSM Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Kannada]
U+20F0 NSM Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Latin]
U+1CD0 NSM Mark: Non-spacing mark, Inherited, Extend, [Bengali, Devanagari, Grantha, Kannada]
U+A836 L Symbol: Other symbol, common, Other, [devanagari, dogra, gujarati, gurmukhi, khojki, kaithi, mahajani, modi, khudawadi, takri, tirhuta]
U+A833 L Number: Other number, common, Other, [devanagari, dogra, gujarati, gurmukhi, khojki, kannada, kaithi, mahajani, modi, nandinagari, khudawadi, takri, tirhuta]
U+1CF4 NSM Mark: Non-spacing mark, inherited, Extend, [devanagari, grantha, kannada]
U+20F0 NSM Mark: Non-spacing mark, inherited, Extend, [devanagari, grantha, latin]
U+1CD0 NSM Mark: Non-spacing mark, inherited, Extend, [bengali, devanagari, grantha, kannada]
findprop 32ff
U+32FF L Symbol: Other symbol, Common, Other, [Han]
U+32FF L Symbol: Other symbol, common, Other, [han]
findprop 1f16d
U+1F16D ON Symbol: Other symbol, Common, Extended Pictographic
U+1F16D ON Symbol: Other symbol, common, Extended Pictographic
findprop U+10e93 U+10eaa
U+10E93 R Letter: Other letter, Yezidi, Other
U+10EAA R Control: Unassigned, Unknown, Other
U+10E93 R Letter: Other letter, yezidi, Other
U+10EAA R Control: Unassigned, unknown, Other
findprop 0602 202a 202b 202c 2068 2069 202d 202e 2067
U+0602 AN Control: Format, Arabic, Prepend
U+202A *LRE Control: Format, Common, Control
U+202B *RLE Control: Format, Common, Control
U+202C *PDF Control: Format, Common, Control
U+2068 *FSI Control: Format, Common, Control
U+2069 *PDI Control: Format, Common, Control
U+202D *LRO Control: Format, Common, Control
U+202E *RLO Control: Format, Common, Control
U+2067 *RLI Control: Format, Common, Control
U+0602 AN Control: Format, arabic, Prepend
U+202A *LRE Control: Format, common, Control
U+202B *RLE Control: Format, common, Control
U+202C *PDF Control: Format, common, Control
U+2068 *FSI Control: Format, common, Control
U+2069 *PDI Control: Format, common, Control
U+202D *LRO Control: Format, common, Control
U+202E *RLO Control: Format, common, Control
U+2067 *RLI Control: Format, common, Control

View File

@ -1,253 +1,220 @@
find script Han
U+2E80..U+2E99 ON Symbol: Other symbol, Han, Other
U+2E9B..U+2EF3 ON Symbol: Other symbol, Han, Other
U+2F00..U+2FD5 ON Symbol: Other symbol, Han, Other
U+3005 L Letter: Modifier letter, Han, Other
U+3007 L Number: Letter number, Han, Other
U+3021..U+3029 L Number: Letter number, Han, Other
U+3038..U+303A L Number: Letter number, Han, Other
U+303B L Letter: Modifier letter, Han, Other
U+3400..U+4DBF L Letter: Other letter, Han, Other
U+4E00..U+9FFF L Letter: Other letter, Han, Other
U+F900..U+FA6D L Letter: Other letter, Han, Other
U+FA70..U+FAD9 L Letter: Other letter, Han, Other
U+16FE2 ON Punctuation: Other punctuation, Han, Other
U+16FE3 L Letter: Modifier letter, Han, Other
U+16FF0..U+16FF1 L Mark: Spacing mark, Han, SpacingMark
U+20000..U+2A6DF L Letter: Other letter, Han, Other
U+2A700..U+2B738 L Letter: Other letter, Han, Other
U+2B740..U+2B81D L Letter: Other letter, Han, Other
U+2B820..U+2CEA1 L Letter: Other letter, Han, Other
U+2CEB0..U+2EBE0 L Letter: Other letter, Han, Other
U+2F800..U+2FA1D L Letter: Other letter, Han, Other
U+30000..U+3134A L Letter: Other letter, Han, Other
** Unrecognized script name "Han"
find type Pe script Common scriptx Hangul
U+3009 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
U+300B ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
U+300D ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
U+300F ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
U+3011 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
U+3015 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
U+3017 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
U+3019 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
U+301B ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
U+301E..U+301F ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana]
U+FF63 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
** Unrecognized script name "Common"
find type Sk
U+005E ON Symbol: Modifier symbol, Common, Other
U+0060 ON Symbol: Modifier symbol, Common, Other
U+00A8 ON Symbol: Modifier symbol, Common, Other
U+00AF ON Symbol: Modifier symbol, Common, Other
U+00B4 ON Symbol: Modifier symbol, Common, Other
U+00B8 ON Symbol: Modifier symbol, Common, Other
U+02C2..U+02C5 ON Symbol: Modifier symbol, Common, Other
U+02D2..U+02DF ON Symbol: Modifier symbol, Common, Other
U+02E5..U+02E9 ON Symbol: Modifier symbol, Common, Other
U+02EA..U+02EB ON Symbol: Modifier symbol, Bopomofo, Other
U+02ED ON Symbol: Modifier symbol, Common, Other
U+02EF..U+02FF ON Symbol: Modifier symbol, Common, Other
U+0375 ON Symbol: Modifier symbol, Greek, Other
U+0384 ON Symbol: Modifier symbol, Greek, Other
U+0385 ON Symbol: Modifier symbol, Common, Other
U+0888 AL Symbol: Modifier symbol, Arabic, Other
U+1FBD ON Symbol: Modifier symbol, Greek, Other
U+1FBF..U+1FC1 ON Symbol: Modifier symbol, Greek, Other
U+1FCD..U+1FCF ON Symbol: Modifier symbol, Greek, Other
U+1FDD..U+1FDF ON Symbol: Modifier symbol, Greek, Other
U+1FED..U+1FEF ON Symbol: Modifier symbol, Greek, Other
U+1FFD..U+1FFE ON Symbol: Modifier symbol, Greek, Other
U+309B..U+309C ON Symbol: Modifier symbol, Common, Other, [Hiragana, Katakana]
U+A700..U+A707 ON Symbol: Modifier symbol, Common, Other, [Han, Latin]
U+A708..U+A716 ON Symbol: Modifier symbol, Common, Other
U+A720..U+A721 ON Symbol: Modifier symbol, Common, Other
U+A789..U+A78A L Symbol: Modifier symbol, Common, Other
U+AB5B L Symbol: Modifier symbol, Common, Other
U+AB6A..U+AB6B ON Symbol: Modifier symbol, Common, Other
U+FBB2..U+FBC2 AL Symbol: Modifier symbol, Arabic, Other
U+FF3E ON Symbol: Modifier symbol, Common, Other
U+FF40 ON Symbol: Modifier symbol, Common, Other
U+FFE3 ON Symbol: Modifier symbol, Common, Other
U+1F3FB..U+1F3FF ON Symbol: Modifier symbol, Common, Extend
U+005E ON Symbol: Modifier symbol, common, Other
U+0060 ON Symbol: Modifier symbol, common, Other
U+00A8 ON Symbol: Modifier symbol, common, Other
U+00AF ON Symbol: Modifier symbol, common, Other
U+00B4 ON Symbol: Modifier symbol, common, Other
U+00B8 ON Symbol: Modifier symbol, common, Other
U+02C2..U+02C5 ON Symbol: Modifier symbol, common, Other
U+02D2..U+02DF ON Symbol: Modifier symbol, common, Other
U+02E5..U+02E9 ON Symbol: Modifier symbol, common, Other
U+02EA..U+02EB ON Symbol: Modifier symbol, bopomofo, Other
U+02ED ON Symbol: Modifier symbol, common, Other
U+02EF..U+02FF ON Symbol: Modifier symbol, common, Other
U+0375 ON Symbol: Modifier symbol, greek, Other
U+0384 ON Symbol: Modifier symbol, greek, Other
U+0385 ON Symbol: Modifier symbol, common, Other
U+0888 AL Symbol: Modifier symbol, arabic, Other
U+1FBD ON Symbol: Modifier symbol, greek, Other
U+1FBF..U+1FC1 ON Symbol: Modifier symbol, greek, Other
U+1FCD..U+1FCF ON Symbol: Modifier symbol, greek, Other
U+1FDD..U+1FDF ON Symbol: Modifier symbol, greek, Other
U+1FED..U+1FEF ON Symbol: Modifier symbol, greek, Other
U+1FFD..U+1FFE ON Symbol: Modifier symbol, greek, Other
U+309B..U+309C ON Symbol: Modifier symbol, common, Other, [hiragana, katakana]
U+A700..U+A707 ON Symbol: Modifier symbol, common, Other, [han, latin]
U+A708..U+A716 ON Symbol: Modifier symbol, common, Other
U+A720..U+A721 ON Symbol: Modifier symbol, common, Other
U+A789..U+A78A L Symbol: Modifier symbol, common, Other
U+AB5B L Symbol: Modifier symbol, common, Other
U+AB6A..U+AB6B ON Symbol: Modifier symbol, common, Other
U+FBB2..U+FBC2 AL Symbol: Modifier symbol, arabic, Other
U+FF3E ON Symbol: Modifier symbol, common, Other
U+FF40 ON Symbol: Modifier symbol, common, Other
U+FFE3 ON Symbol: Modifier symbol, common, Other
U+1F3FB..U+1F3FF ON Symbol: Modifier symbol, common, Extend
find type Pd
U+002D ES Punctuation: Dash punctuation, Common, Other
U+058A ON Punctuation: Dash punctuation, Armenian, Other
U+05BE R Punctuation: Dash punctuation, Hebrew, Other
U+1400 ON Punctuation: Dash punctuation, Canadian_Aboriginal, Other
U+1806 ON Punctuation: Dash punctuation, Mongolian, Other
U+2010..U+2015 ON Punctuation: Dash punctuation, Common, Other
U+2E17 ON Punctuation: Dash punctuation, Common, Other
U+2E1A ON Punctuation: Dash punctuation, Common, Other
U+2E3A..U+2E3B ON Punctuation: Dash punctuation, Common, Other
U+2E40 ON Punctuation: Dash punctuation, Common, Other
U+2E5D ON Punctuation: Dash punctuation, Common, Other
U+301C ON Punctuation: Dash punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana]
U+3030 ON Punctuation: Dash punctuation, Common, Extended Pictographic, [Bopomofo, Hangul, Han, Hiragana, Katakana]
U+30A0 ON Punctuation: Dash punctuation, Common, Other, [Hiragana, Katakana]
U+FE31..U+FE32 ON Punctuation: Dash punctuation, Common, Other
U+FE58 ON Punctuation: Dash punctuation, Common, Other
U+FE63 ES Punctuation: Dash punctuation, Common, Other
U+FF0D ES Punctuation: Dash punctuation, Common, Other
U+10EAD R Punctuation: Dash punctuation, Yezidi, Other
U+002D ES Punctuation: Dash punctuation, common, Other
U+058A ON Punctuation: Dash punctuation, armenian, Other
U+05BE R Punctuation: Dash punctuation, hebrew, Other
U+1400 ON Punctuation: Dash punctuation, canadianaboriginal, Other
U+1806 ON Punctuation: Dash punctuation, mongolian, Other
U+2010..U+2015 ON Punctuation: Dash punctuation, common, Other
U+2E17 ON Punctuation: Dash punctuation, common, Other
U+2E1A ON Punctuation: Dash punctuation, common, Other
U+2E3A..U+2E3B ON Punctuation: Dash punctuation, common, Other
U+2E40 ON Punctuation: Dash punctuation, common, Other
U+2E5D ON Punctuation: Dash punctuation, common, Other
U+301C ON Punctuation: Dash punctuation, common, Other, [bopomofo, hangul, han, hiragana, katakana]
U+3030 ON Punctuation: Dash punctuation, common, Extended Pictographic, [bopomofo, hangul, han, hiragana, katakana]
U+30A0 ON Punctuation: Dash punctuation, common, Other, [hiragana, katakana]
U+FE31..U+FE32 ON Punctuation: Dash punctuation, common, Other
U+FE58 ON Punctuation: Dash punctuation, common, Other
U+FE63 ES Punctuation: Dash punctuation, common, Other
U+FF0D ES Punctuation: Dash punctuation, common, Other
U+10EAD R Punctuation: Dash punctuation, yezidi, Other
find gbreak LVT
U+AC01..U+AC1B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AC1D..U+AC37 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AC39..U+AC53 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AC55..U+AC6F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AC71..U+AC8B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AC8D..U+ACA7 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+ACA9..U+ACC3 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+ACC5..U+ACDF L Letter: Other letter, Hangul, Hangul syllable type LVT
U+ACE1..U+ACFB L Letter: Other letter, Hangul, Hangul syllable type LVT
U+ACFD..U+AD17 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AD19..U+AD33 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AD35..U+AD4F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AD51..U+AD6B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AD6D..U+AD87 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AD89..U+ADA3 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+ADA5..U+ADBF L Letter: Other letter, Hangul, Hangul syllable type LVT
U+ADC1..U+ADDB L Letter: Other letter, Hangul, Hangul syllable type LVT
U+ADDD..U+ADF7 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+ADF9..U+AE13 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AE15..U+AE2F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AE31..U+AE4B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AE4D..U+AE67 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AE69..U+AE83 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AE85..U+AE9F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AEA1..U+AEBB L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AEBD..U+AED7 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AED9..U+AEF3 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AEF5..U+AF0F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AF11..U+AF2B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AF2D..U+AF47 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AF49..U+AF63 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AF65..U+AF7F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AF81..U+AF9B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AF9D..U+AFB7 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AFB9..U+AFD3 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AFD5..U+AFEF L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AFF1..U+B00B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B00D..U+B027 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B029..U+B043 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B045..U+B05F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B061..U+B07B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B07D..U+B097 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B099..U+B0B3 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B0B5..U+B0CF L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B0D1..U+B0EB L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B0ED..U+B107 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B109..U+B123 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B125..U+B13F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B141..U+B15B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B15D..U+B177 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B179..U+B193 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B195..U+B1AF L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B1B1..U+B1CB L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B1CD..U+B1E7 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B1E9..U+B203 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B205..U+B21F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B221..U+B23B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B23D..U+B257 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B259..U+B273 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B275..U+B28F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B291..U+B2AB L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B2AD..U+B2C7 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B2C9..U+B2E3 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B2E5..U+B2FF L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B301..U+B31B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B31D..U+B337 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B339..U+B353 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B355..U+B36F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B371..U+B38B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B38D..U+B3A7 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B3A9..U+B3C3 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B3C5..U+B3DF L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B3E1..U+B3FB L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B3FD..U+B417 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B419..U+B433 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B435..U+B44F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B451..U+B46B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B46D..U+B487 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B489..U+B4A3 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B4A5..U+B4BF L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B4C1..U+B4DB L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B4DD..U+B4F7 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B4F9..U+B513 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B515..U+B52F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B531..U+B54B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B54D..U+B567 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B569..U+B583 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B585..U+B59F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B5A1..U+B5BB L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B5BD..U+B5D7 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B5D9..U+B5F3 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B5F5..U+B60F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B611..U+B62B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B62D..U+B647 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B649..U+B663 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B665..U+B67F L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B681..U+B69B L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B69D..U+B6B7 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B6B9..U+B6D3 L Letter: Other letter, Hangul, Hangul syllable type LVT
U+B6D5..U+B6EF L Letter: Other letter, Hangul, Hangul syllable type LVT
U+AC01..U+AC1B L Letter: Other letter, hangul, Hangul syllable type LVT
U+AC1D..U+AC37 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AC39..U+AC53 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AC55..U+AC6F L Letter: Other letter, hangul, Hangul syllable type LVT
U+AC71..U+AC8B L Letter: Other letter, hangul, Hangul syllable type LVT
U+AC8D..U+ACA7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+ACA9..U+ACC3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+ACC5..U+ACDF L Letter: Other letter, hangul, Hangul syllable type LVT
U+ACE1..U+ACFB L Letter: Other letter, hangul, Hangul syllable type LVT
U+ACFD..U+AD17 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AD19..U+AD33 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AD35..U+AD4F L Letter: Other letter, hangul, Hangul syllable type LVT
U+AD51..U+AD6B L Letter: Other letter, hangul, Hangul syllable type LVT
U+AD6D..U+AD87 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AD89..U+ADA3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+ADA5..U+ADBF L Letter: Other letter, hangul, Hangul syllable type LVT
U+ADC1..U+ADDB L Letter: Other letter, hangul, Hangul syllable type LVT
U+ADDD..U+ADF7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+ADF9..U+AE13 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AE15..U+AE2F L Letter: Other letter, hangul, Hangul syllable type LVT
U+AE31..U+AE4B L Letter: Other letter, hangul, Hangul syllable type LVT
U+AE4D..U+AE67 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AE69..U+AE83 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AE85..U+AE9F L Letter: Other letter, hangul, Hangul syllable type LVT
U+AEA1..U+AEBB L Letter: Other letter, hangul, Hangul syllable type LVT
U+AEBD..U+AED7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AED9..U+AEF3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AEF5..U+AF0F L Letter: Other letter, hangul, Hangul syllable type LVT
U+AF11..U+AF2B L Letter: Other letter, hangul, Hangul syllable type LVT
U+AF2D..U+AF47 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AF49..U+AF63 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AF65..U+AF7F L Letter: Other letter, hangul, Hangul syllable type LVT
U+AF81..U+AF9B L Letter: Other letter, hangul, Hangul syllable type LVT
U+AF9D..U+AFB7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AFB9..U+AFD3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+AFD5..U+AFEF L Letter: Other letter, hangul, Hangul syllable type LVT
U+AFF1..U+B00B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B00D..U+B027 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B029..U+B043 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B045..U+B05F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B061..U+B07B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B07D..U+B097 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B099..U+B0B3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B0B5..U+B0CF L Letter: Other letter, hangul, Hangul syllable type LVT
U+B0D1..U+B0EB L Letter: Other letter, hangul, Hangul syllable type LVT
U+B0ED..U+B107 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B109..U+B123 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B125..U+B13F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B141..U+B15B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B15D..U+B177 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B179..U+B193 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B195..U+B1AF L Letter: Other letter, hangul, Hangul syllable type LVT
U+B1B1..U+B1CB L Letter: Other letter, hangul, Hangul syllable type LVT
U+B1CD..U+B1E7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B1E9..U+B203 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B205..U+B21F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B221..U+B23B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B23D..U+B257 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B259..U+B273 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B275..U+B28F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B291..U+B2AB L Letter: Other letter, hangul, Hangul syllable type LVT
U+B2AD..U+B2C7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B2C9..U+B2E3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B2E5..U+B2FF L Letter: Other letter, hangul, Hangul syllable type LVT
U+B301..U+B31B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B31D..U+B337 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B339..U+B353 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B355..U+B36F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B371..U+B38B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B38D..U+B3A7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B3A9..U+B3C3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B3C5..U+B3DF L Letter: Other letter, hangul, Hangul syllable type LVT
U+B3E1..U+B3FB L Letter: Other letter, hangul, Hangul syllable type LVT
U+B3FD..U+B417 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B419..U+B433 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B435..U+B44F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B451..U+B46B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B46D..U+B487 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B489..U+B4A3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B4A5..U+B4BF L Letter: Other letter, hangul, Hangul syllable type LVT
U+B4C1..U+B4DB L Letter: Other letter, hangul, Hangul syllable type LVT
U+B4DD..U+B4F7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B4F9..U+B513 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B515..U+B52F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B531..U+B54B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B54D..U+B567 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B569..U+B583 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B585..U+B59F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B5A1..U+B5BB L Letter: Other letter, hangul, Hangul syllable type LVT
U+B5BD..U+B5D7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B5D9..U+B5F3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B5F5..U+B60F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B611..U+B62B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B62D..U+B647 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B649..U+B663 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B665..U+B67F L Letter: Other letter, hangul, Hangul syllable type LVT
U+B681..U+B69B L Letter: Other letter, hangul, Hangul syllable type LVT
U+B69D..U+B6B7 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B6B9..U+B6D3 L Letter: Other letter, hangul, Hangul syllable type LVT
U+B6D5..U+B6EF L Letter: Other letter, hangul, Hangul syllable type LVT
...
find script Old_Uyghur
U+10F70..U+10F81 R Letter: Other letter, Old_Uyghur, Other
U+10F82..U+10F85 NSM Mark: Non-spacing mark, Old_Uyghur, Extend
U+10F86..U+10F89 R Punctuation: Other punctuation, Old_Uyghur, Other
** Unrecognized script name "Old_Uyghur"
find bidi PDF
U+202C *PDF Control: Format, Common, Control
U+202C *PDF Control: Format, common, Control
find bidi CS
U+002C CS Punctuation: Other punctuation, Common, Other
U+002E..U+002F CS Punctuation: Other punctuation, Common, Other
U+003A CS Punctuation: Other punctuation, Common, Other
U+00A0 CS Separator: Space separator, Common, Other
U+060C CS Punctuation: Other punctuation, Common, Other, [Arabic, Nko, Hanifi_Rohingya, Syriac, Thaana, Yezidi]
U+202F CS Separator: Space separator, Common, Other, [Latin, Mongolian]
U+2044 CS Symbol: Mathematical symbol, Common, Other
U+FE50 CS Punctuation: Other punctuation, Common, Other
U+FE52 CS Punctuation: Other punctuation, Common, Other
U+FE55 CS Punctuation: Other punctuation, Common, Other
U+FF0C CS Punctuation: Other punctuation, Common, Other
U+FF0E..U+FF0F CS Punctuation: Other punctuation, Common, Other
U+FF1A CS Punctuation: Other punctuation, Common, Other
U+002C CS Punctuation: Other punctuation, common, Other
U+002E..U+002F CS Punctuation: Other punctuation, common, Other
U+003A CS Punctuation: Other punctuation, common, Other
U+00A0 CS Separator: Space separator, common, Other
U+060C CS Punctuation: Other punctuation, common, Other, [arabic, nko, hanifirohingya, syriac, thaana, yezidi]
U+202F CS Separator: Space separator, common, Other, [latin, mongolian]
U+2044 CS Symbol: Mathematical symbol, common, Other
U+FE50 CS Punctuation: Other punctuation, common, Other
U+FE52 CS Punctuation: Other punctuation, common, Other
U+FE55 CS Punctuation: Other punctuation, common, Other
U+FF0C CS Punctuation: Other punctuation, common, Other
U+FF0E..U+FF0F CS Punctuation: Other punctuation, common, Other
U+FF1A CS Punctuation: Other punctuation, common, Other
find bidi CS type Sm
U+2044 CS Symbol: Mathematical symbol, Common, Other
U+2044 CS Symbol: Mathematical symbol, common, Other
find bidi B
U+000A B Control: Control, Common, LF
U+000D B Control: Control, Common, CR
U+001C..U+001E B Control: Control, Common, Control
U+0085 B Control: Control, Common, Control
U+2029 B Separator: Paragraph separator, Common, Control
U+000A B Control: Control, common, LF
U+000D B Control: Control, common, CR
U+001C..U+001E B Control: Control, common, Control
U+0085 B Control: Control, common, Control
U+2029 B Separator: Paragraph separator, common, Control
find bidi FSI
U+2068 *FSI Control: Format, Common, Control
U+2068 *FSI Control: Format, common, Control
find bidi PDI
U+2069 *PDI Control: Format, Common, Control
U+2069 *PDI Control: Format, common, Control
find bidi RLI
U+2067 *RLI Control: Format, Common, Control
U+2067 *RLI Control: Format, common, Control
find bidi RLO
U+202E *RLO Control: Format, Common, Control
U+202E *RLO Control: Format, common, Control
find bidi S
U+0009 S Control: Control, Common, Control
U+000B S Control: Control, Common, Control
U+001F S Control: Control, Common, Control
U+0009 S Control: Control, common, Control
U+000B S Control: Control, common, Control
U+001F S Control: Control, common, Control
find bidi WS
U+000C WS Control: Control, Common, Control
U+0020 WS Separator: Space separator, Common, Other
U+1680 WS Separator: Space separator, Ogham, Other
U+2000..U+200A WS Separator: Space separator, Common, Other
U+2028 WS Separator: Line separator, Common, Control
U+205F WS Separator: Space separator, Common, Other
U+3000 WS Separator: Space separator, Common, Other
U+000C WS Control: Control, common, Control
U+0020 WS Separator: Space separator, common, Other
U+1680 WS Separator: Space separator, ogham, Other
U+2000..U+200A WS Separator: Space separator, common, Other
U+2028 WS Separator: Line separator, common, Control
U+205F WS Separator: Space separator, common, Other
U+3000 WS Separator: Space separator, common, Other
find bidi_control
U+061C *AL Control: Format, Arabic, Control, [Arabic, Syriac, Thaana]
U+200E *L Control: Format, Common, Control
U+200F *R Control: Format, Common, Control
U+202A *LRE Control: Format, Common, Control
U+202B *RLE Control: Format, Common, Control
U+202C *PDF Control: Format, Common, Control
U+202D *LRO Control: Format, Common, Control
U+202E *RLO Control: Format, Common, Control
U+2066 *LRT Control: Format, Common, Control
U+2067 *RLI Control: Format, Common, Control
U+2068 *FSI Control: Format, Common, Control
U+2069 *PDI Control: Format, Common, Control
U+061C *AL Control: Format, arabic, Control, [arabic, syriac, thaana]
U+200E *L Control: Format, common, Control
U+200F *R Control: Format, common, Control
U+202A *LRE Control: Format, common, Control
U+202B *RLE Control: Format, common, Control
U+202C *PDF Control: Format, common, Control
U+202D *LRO Control: Format, common, Control
U+202E *RLO Control: Format, common, Control
U+2066 *LRI Control: Format, common, Control
U+2067 *RLI Control: Format, common, Control
U+2068 *FSI Control: Format, common, Control
U+2069 *PDI Control: Format, common, Control

View File

@ -123,18 +123,20 @@ opcode is used to select the column. The values are as follows:
*/
static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
/* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */
{ 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0 }, /* PT_LAMP */
{ 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0 }, /* PT_GC */
{ 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0 }, /* PT_PC */
{ 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0 }, /* PT_SC */
{ 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0 }, /* PT_ALNUM */
{ 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_SPACE */
{ 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_PXSPACE */
{ 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0 }, /* PT_WORD */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 } /* PT_UCNC */
/* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BIDICO */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */
{ 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 1 }, /* PT_LAMP */
{ 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */
{ 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */
{ 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */
{ 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0, 0, 1 }, /* PT_ALNUM */
{ 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0, 0, 1 }, /* PT_SPACE */
{ 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0, 0, 1 }, /* PT_PXSPACE */
{ 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0, 0, 1 }, /* PT_WORD */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */
{ 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 } /* PT_BIDICO */
};
/* This table is used to check whether auto-possessification is possible
@ -251,6 +253,14 @@ switch(ptype)
if (c == *p++) return negated;
}
break; /* Control never reaches here */
/* Haven't yet thought these through. */
case PT_BIDICL:
return FALSE;
case PT_BIDICO:
return FALSE;
}
return FALSE;

View File

@ -2088,7 +2088,8 @@ get_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, uint16_t *ptypeptr,
PCRE2_UCHAR c;
PCRE2_SIZE i, bot, top;
PCRE2_SPTR ptr = *ptrptr;
PCRE2_UCHAR name[32];
PCRE2_UCHAR name[50];
PCRE2_UCHAR *vptr = NULL;
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
c = *ptr++;
@ -2109,9 +2110,11 @@ if (c == CHAR_LEFT_CURLY_BRACKET)
{
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
c = *ptr++;
while (c == '_' || c == '-' || isspace(c)) c = *ptr++;
if (c == CHAR_NUL) goto ERROR_RETURN;
if (c == CHAR_RIGHT_CURLY_BRACKET) break;
name[i] = c;
name[i] = tolower(c);
if (c == ':' || c == '=') vptr = name + i;
}
if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
name[i] = 0;
@ -2122,13 +2125,28 @@ letter. */
else if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0)
{
name[0] = c;
name[0] = tolower(c);
name[1] = 0;
}
else goto ERROR_RETURN;
*ptrptr = ptr;
/* If the property contains ':' or '=' we have class name and value separately
specified. The only case currently supported is Bidi_Class, for which the
property names are "bidi<name>". */
if (vptr != NULL)
{
*vptr = 0; /* Terminate class name */
if (PRIV(strcmp_c8)(name, "bidiclass") != 0)
{
*errorcodeptr = ERR47;
return FALSE;
}
memmove(name + 4, vptr + 1, (name + i - vptr)*sizeof(PCRE2_UCHAR));
}
/* Search for a recognized property name using binary chop. */
bot = 0;
@ -2147,6 +2165,7 @@ while (bot < top)
}
if (r > 0) bot = i + 1; else top = i;
}
*errorcodeptr = ERR47; /* Unrecognized name */
return FALSE;

View File

@ -1240,6 +1240,14 @@ for (;;)
c >= 0xe000;
break;
case PT_BIDICO:
OK = UCD_BIDICONTROL(c) != 0;
break;
case PT_BIDICL:
OK = UCD_BIDICLASS(c) == code[2];
break;
/* Should never occur, but keep compilers from grumbling. */
default:
@ -1498,6 +1506,14 @@ for (;;)
c >= 0xe000;
break;
case PT_BIDICO:
OK = UCD_BIDICONTROL(c) != 0;
break;
case PT_BIDICL:
OK = UCD_BIDICLASS(c) == code[3];
break;
/* Should never occur, but keep compilers from grumbling. */
default:
@ -1739,6 +1755,14 @@ for (;;)
c >= 0xe000;
break;
case PT_BIDICO:
OK = UCD_BIDICONTROL(c) != 0;
break;
case PT_BIDICL:
OK = UCD_BIDICLASS(c) == code[3];
break;
/* Should never occur, but keep compilers from grumbling. */
default:
@ -2005,6 +2029,14 @@ for (;;)
c >= 0xe000;
break;
case PT_BIDICO:
OK = UCD_BIDICONTROL(c) != 0;
break;
case PT_BIDICL:
OK = UCD_BIDICLASS(c) == code[1 + IMM2_SIZE + 2];
break;
/* Should never occur, but keep compilers from grumbling. */
default:

View File

@ -1262,12 +1262,14 @@ only. */
#define PT_PC 3 /* Specified particular characteristic (e.g. Lu) */
#define PT_SC 4 /* Script (e.g. Han) */
#define PT_ALNUM 5 /* Alphanumeric - the union of L and N */
#define PT_SPACE 6 /* Perl space - Z plus 9,10,12,13 */
#define PT_SPACE 6 /* Perl space - general category Z plus 9,10,12,13 */
#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */
#define PT_WORD 8 /* Word - L plus N plus underscore */
#define PT_CLIST 9 /* Pseudo-property: match character list */
#define PT_UCNC 10 /* Universal Character nameable character */
#define PT_TABSIZE 11 /* Size of square table for autopossessify tests */
#define PT_BIDICL 11 /* Specified bidi class */
#define PT_BIDICO 12 /* Bidi control character */
#define PT_TABSIZE 13 /* Size of square table for autopossessify tests */
/* The following special properties are used only in XCLASS items, when POSIX
classes are specified and PCRE2_UCP is set - in other words, for Unicode
@ -1275,9 +1277,9 @@ handling of these classes. They are not available via the \p or \P escapes like
those in the above list, and so they do not take part in the autopossessifying
table. */
#define PT_PXGRAPH 11 /* [:graph:] - characters that mark the paper */
#define PT_PXPRINT 12 /* [:print:] - [:graph:] plus non-control spaces */
#define PT_PXPUNCT 13 /* [:punct:] - punctuation characters */
#define PT_PXGRAPH 13 /* [:graph:] - characters that mark the paper */
#define PT_PXPRINT 14 /* [:print:] - [:graph:] plus non-control spaces */
#define PT_PXPUNCT 15 /* [:punct:] - punctuation characters */
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
contain characters with values greater than 255. */
@ -1828,8 +1830,11 @@ typedef struct {
property. The remaining bits hold the bidi class, but as there are only 23
classes, we can mask off 5 bits - leaving two free for the future. */
#define UCD_BIDICLASS(ch) (GET_UCD(ch)->bidi & 0x1fu)
#define UCD_BIDICONTROL(ch) (GET_UCD(ch)->bidi & 0x80u)
#define UCD_BIDICLASS_MASK 0x1fu
#define UCD_BIDICONTROL_BIT 0x80u
#define UCD_BIDICLASS(ch) (GET_UCD(ch)->bidi & UCD_BIDICLASS_MASK)
#define UCD_BIDICONTROL(ch) (GET_UCD(ch)->bidi & UCD_BIDICONTROL_BIT)
/* Header for serialized pcre2 codes. */

View File

@ -159,7 +159,8 @@ enum { RM100=100, RM101 };
#ifdef SUPPORT_UNICODE
enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
RM208, RM209, RM210, RM211, RM212, RM213, RM214, RM215,
RM216, RM217, RM218, RM219, RM220, RM221, RM222 };
RM216, RM217, RM218, RM219, RM220, RM221, RM222, RM223,
RM224 };
#endif
/* Define short names for general fields in the current backtrack frame, which
@ -2503,6 +2504,16 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
RRETURN(MATCH_NOMATCH);
break;
case PT_BIDICO:
if (((prop->bidi & UCD_BIDICONTROL_BIT) != 0) == (Fop == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
break;
case PT_BIDICL:
if (((prop->bidi & UCD_BIDICLASS_MASK) == Fecode[2]) == (Fop == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
break;
/* This should never occur */
default:
@ -2804,6 +2815,34 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
break;
case PT_BIDICO:
for (i = 1; i <= Lmin; i++)
{
if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
if ((UCD_BIDICONTROL(fc) != 0) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
break;
case PT_BIDICL:
for (i = 1; i <= Lmin; i++)
{
if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
break;
/* This should not occur */
default:
@ -3562,6 +3601,40 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
/* Control never gets here */
case PT_BIDICO:
for (;;)
{
RMATCH(Fecode, RM223);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
if ((UCD_BIDICONTROL(fc) != 0) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
case PT_BIDICL:
for (;;)
{
RMATCH(Fecode, RM224);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
/* This should never occur */
default:
return PCRE2_ERROR_INTERNAL;
@ -4076,6 +4149,38 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
break;
case PT_BIDICO:
for (i = Lmin; i < Lmax; i++)
{
int len = 1;
if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
GETCHARLENTEST(fc, Feptr, len);
if ((UCD_BIDICONTROL(fc) != 0) == (Lctype == OP_NOTPROP))
break;
Feptr+= len;
}
break;
case PT_BIDICL:
for (i = Lmin; i < Lmax; i++)
{
int len = 1;
if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
break;
}
GETCHARLENTEST(fc, Feptr, len);
if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
break;
Feptr+= len;
}
break;
default:
return PCRE2_ERROR_INTERNAL;
}
@ -6066,7 +6171,7 @@ switch (Freturn_id)
LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
LBL(221) LBL(222)
LBL(221) LBL(222) LBL(223) LBL(224)
#endif
default:

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2019 University of Cambridge
New API code Copyright (c) 2016-2021 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -273,8 +273,8 @@ print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
{
if (code[1] != PT_CLIST)
{
fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1],
code[2]), after);
const char *s = get_ucpname(code[1], code[2]);
fprintf(f, "%s%s %c%s%s", before, OP_names[*code], toupper(s[0]), s+1, after);
}
else
{
@ -724,6 +724,7 @@ for(;;)
{
unsigned int ptype = *ccode++;
unsigned int pvalue = *ccode++;
const char *s;
switch(ptype)
{
@ -740,8 +741,8 @@ for(;;)
break;
default:
fprintf(f, "\\%c{%s}", (not? 'P':'p'),
get_ucpname(ptype, pvalue));
s = get_ucpname(ptype, pvalue);
fprintf(f, "\\%c{%c%s}", (not? 'P':'p'), toupper(s[0]), s+1);
break;
}
}

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2018 University of Cambridge
New API code Copyright (c) 2018-2021 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without

File diff suppressed because it is too large Load Diff

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2019 University of Cambridge
New API code Copyright (c) 2016-2021 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -207,6 +207,16 @@ while ((t = *data++) != XCL_END)
}
break;
case PT_BIDICO:
if (((prop->bidi & UCD_BIDICONTROL_BIT) != 0) == isprop)
return !negated;
break;
case PT_BIDICL:
if (((prop->bidi & UCD_BIDICLASS_MASK) == data[1]) == isprop)
return !negated;
break;
/* The following three properties can occur only in an XCLASS, as there
is no \p or \P coding for them. */

View File

@ -441,6 +441,7 @@ enum { MOD_CTC, /* Applies to a compile context */
MOD_PAT, /* Applies to a pattern */
MOD_PATP, /* Ditto, OK for Perl test */
MOD_DAT, /* Applies to a data line */
MOD_DATP, /* Ditto, OK for Perl test */
MOD_PD, /* Applies to a pattern or a data line */
MOD_PDP, /* As MOD_PD, OK for Perl test */
MOD_PND, /* As MOD_PD, but not for a default pattern */
@ -700,7 +701,7 @@ static modstruct modlist[] = {
{ "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
{ "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
{ "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
{ "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) },
{ "no_jit", MOD_DATP, MOD_OPT, PCRE2_NO_JIT, DO(options) },
{ "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
{ "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
{ "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
@ -3583,6 +3584,7 @@ if (restrict_for_perl_test) switch(m->which)
{
case MOD_PNDP:
case MOD_PATP:
case MOD_DATP:
case MOD_PDP:
break;
@ -3605,6 +3607,7 @@ switch (m->which)
break;
case MOD_DAT: /* Data line modifier */
case MOD_DATP: /* Allowed for Perl test */
if (dctl != NULL) field = dctl;
break;

114
testdata/testinput4 vendored
View File

@ -2495,4 +2495,118 @@
\x{42f}
\x{44f}
# -----------------------------------------------------------------------------
# Tests for bidi control and bidi class properties, not yet supported by JIT.
#subject no_jit
/\p{ bidi_control }/utf
-->\x{202c}<--
/\p{bidicontrol}+/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/\p{bidicontrol}+?/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/\p{bidicontrol}++/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/[\p{bidi_control}]/utf
-->\x{202c}<--
/[\p{bidicontrol}]+/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/[\p{bidicontrol}]+?/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/[\p{bidicontrol}]++/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/[\p{bidicontrol}<>]+/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/\P{bidicontrol}+/g,utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/\p{^bidicontrol}+/g,utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/\p{bidi class = al}/utf
-->\x{061D}<--
/\p{bidi class = al}+/utf
-->\x{061D}\x{061e}\x{061f}<--
/\p{bidi_class : AL}+?/utf
-->\x{061D}\x{061e}\x{061f}<--
/\p{Bidi_Class : AL}++/utf
-->\x{061D}\x{061e}\x{061f}<--
/\p{bidi class = aN}+/utf
-->\x{061D}\x{0602}\x{0604}\x{061f}<--
/\p{bidi class = B}+/utf
-->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<--
/\p{bidi class:BN}+/utf
-->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<--
/\p{bidiclass:cs}+/utf
-->,.\x{060c}\x{ff1a}<--
/\p{bidiclass:En}+/utf
-->09\x{b2}\x{2074}\x{1fbf9}<--
/\p{bidiclass:es}+/utf
==>+-\x{207a}\x{ff0d}<==
/\p{bidiclass:et}+/utf
-->#\{24}%\x{a2}\x{A838}\x{1e2ff}<--
/\p{bidiclass:FSI}+/utf
-->\x{2068}<--
/\p{bidi class:L}+/utf
-->ABC<--
/\P{bidi class:L}+/utf
-->ABC<--
/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf
-->\x{202a}\x{2066}\x{202d}<--
/\p{bidi class:NSM}+/utf
-->\x{9bc}\x{a71}\x{e31}<--
/\p{bidi class:ON}+/utf
-->\x{21}'()*;@\x{384}\x{2039}<=-
/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf
-->\x{202c}\x{2069}<--
/\p{bidi class:R}+/utf
-->\x{590}\x{5c6}\x{200f}\x{10805}<--
/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf
-->\x{202b}\x{2067}\x{202e}<--
/\p{bidi class:S}+\p{bidiclass:WS}+/utf
-->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<--
#subject -no_jit
# -----------------------------------------------------------------------------
# End of testinput4

4
testdata/testinput5 vendored
View File

@ -2188,4 +2188,8 @@
/(\xc1)\1/i,ucp
\xc1\xe1\=no_jit
/\p{L&}+\p{bidi_control}/B
/\p{bidi_control}+\p{L&}/B
# End of testinput5

110
testdata/testinput7 vendored
View File

@ -2093,4 +2093,114 @@
/(?<=\x{100})\x{200}(?=\x{300})/utf,allusedtext
\x{100}\x{200}\x{300}
# -----------------------------------------------------------------------------
# Tests for bidi control and bidi class properties
/\p{ bidi_control }/utf
-->\x{202c}<--
/\p{bidicontrol}+/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/\p{bidicontrol}+?/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/\p{bidicontrol}++/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/[\p{bidi_control}]/utf
-->\x{202c}<--
/[\p{bidicontrol}]+/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/[\p{bidicontrol}]+?/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/[\p{bidicontrol}]++/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/[\p{bidicontrol}<>]+/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/\P{bidicontrol}+/g,utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/\p{^bidicontrol}+/g,utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
/\p{bidi class = al}/utf
-->\x{061D}<--
/\p{bidi class = al}+/utf
-->\x{061D}\x{061e}\x{061f}<--
/\p{bidi_class : AL}+?/utf
-->\x{061D}\x{061e}\x{061f}<--
/\p{Bidi_Class : AL}++/utf
-->\x{061D}\x{061e}\x{061f}<--
/\p{bidi class = aN}+/utf
-->\x{061D}\x{0602}\x{0604}\x{061f}<--
/\p{bidi class = B}+/utf
-->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<--
/\p{bidi class:BN}+/utf
-->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<--
/\p{bidiclass:cs}+/utf
-->,.\x{060c}\x{ff1a}<--
/\p{bidiclass:En}+/utf
-->09\x{b2}\x{2074}\x{1fbf9}<--
/\p{bidiclass:es}+/utf
==>+-\x{207a}\x{ff0d}<==
/\p{bidiclass:et}+/utf
-->#\{24}%\x{a2}\x{A838}\x{1e2ff}<--
/\p{bidiclass:FSI}+/utf
-->\x{2068}<--
/\p{bidi class:L}+/utf
-->ABC<--
/\P{bidi class:L}+/utf
-->ABC<--
/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf
-->\x{202a}\x{2066}\x{202d}<--
/\p{bidi class:NSM}+/utf
-->\x{9bc}\x{a71}\x{e31}<--
/\p{bidi class:ON}+/utf
-->\x{21}'()*;@\x{384}\x{2039}<=-
/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf
-->\x{202c}\x{2069}<--
/\p{bidi class:R}+/utf
-->\x{590}\x{5c6}\x{200f}\x{10805}<--
/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf
-->\x{202b}\x{2067}\x{202e}<--
/\p{bidi class:S}+\p{bidiclass:WS}+/utf
-->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<--
# -----------------------------------------------------------------------------
# End of testinput7

159
testdata/testoutput4 vendored
View File

@ -4032,4 +4032,163 @@ No match
\x{44f}
0:
# -----------------------------------------------------------------------------
# Tests for bidi control and bidi class properties, not yet supported by JIT.
#subject no_jit
/\p{ bidi_control }/utf
-->\x{202c}<--
0: \x{202c}
/\p{bidicontrol}+/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: \x{2066}\x{2067}\x{2068}\x{2069}
/\p{bidicontrol}+?/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: \x{61c}
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: \x{2066}
/\p{bidicontrol}++/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: \x{2066}\x{2067}\x{2068}\x{2069}
/[\p{bidi_control}]/utf
-->\x{202c}<--
0: \x{202c}
/[\p{bidicontrol}]+/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: \x{2066}\x{2067}\x{2068}\x{2069}
/[\p{bidicontrol}]+?/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: \x{61c}
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: \x{2066}
/[\p{bidicontrol}]++/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: \x{2066}\x{2067}\x{2068}\x{2069}
/[\p{bidicontrol}<>]+/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: >\x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: >\x{2066}\x{2067}\x{2068}\x{2069}<
/\P{bidicontrol}+/g,utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: -->
0: <--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: -->
0: <--
/\p{^bidicontrol}+/g,utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: -->
0: <--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: -->
0: <--
/\p{bidi class = al}/utf
-->\x{061D}<--
0: \x{61d}
/\p{bidi class = al}+/utf
-->\x{061D}\x{061e}\x{061f}<--
0: \x{61d}\x{61e}\x{61f}
/\p{bidi_class : AL}+?/utf
-->\x{061D}\x{061e}\x{061f}<--
0: \x{61d}
/\p{Bidi_Class : AL}++/utf
-->\x{061D}\x{061e}\x{061f}<--
0: \x{61d}\x{61e}\x{61f}
/\p{bidi class = aN}+/utf
-->\x{061D}\x{0602}\x{0604}\x{061f}<--
0: \x{602}\x{604}
/\p{bidi class = B}+/utf
-->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<--
0: \x{0a}\x{0d}\x{1c}\x{1e}\x{85}\x{2029}
/\p{bidi class:BN}+/utf
-->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<--
0: \x{00}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}
/\p{bidiclass:cs}+/utf
-->,.\x{060c}\x{ff1a}<--
0: ,.\x{60c}\x{ff1a}
/\p{bidiclass:En}+/utf
-->09\x{b2}\x{2074}\x{1fbf9}<--
0: 09\x{b2}\x{2074}\x{1fbf9}
/\p{bidiclass:es}+/utf
==>+-\x{207a}\x{ff0d}<==
0: +-\x{207a}\x{ff0d}
/\p{bidiclass:et}+/utf
-->#\{24}%\x{a2}\x{A838}\x{1e2ff}<--
0: #
/\p{bidiclass:FSI}+/utf
-->\x{2068}<--
0: \x{2068}
/\p{bidi class:L}+/utf
-->ABC<--
0: ABC
/\P{bidi class:L}+/utf
-->ABC<--
0: -->
/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf
-->\x{202a}\x{2066}\x{202d}<--
0: \x{202a}\x{2066}\x{202d}
/\p{bidi class:NSM}+/utf
-->\x{9bc}\x{a71}\x{e31}<--
0: \x{9bc}\x{a71}\x{e31}
/\p{bidi class:ON}+/utf
-->\x{21}'()*;@\x{384}\x{2039}<=-
0: >!'()*;@\x{384}\x{2039}<=
/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf
-->\x{202c}\x{2069}<--
0: \x{202c}\x{2069}
/\p{bidi class:R}+/utf
-->\x{590}\x{5c6}\x{200f}\x{10805}<--
0: \x{590}\x{5c6}\x{200f}\x{10805}
/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf
-->\x{202b}\x{2067}\x{202e}<--
0: \x{202b}\x{2067}\x{202e}
/\p{bidi class:S}+\p{bidiclass:WS}+/utf
-->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<--
0: \x{09}\x{0b}\x{1f} \x{0c} \x{2000} \x{3000}
#subject -no_jit
# -----------------------------------------------------------------------------
# End of testinput4

64
testdata/testoutput5 vendored
View File

@ -3298,7 +3298,7 @@ No match
AllAny+
notprop Any
AllAny+
prop L&
prop Lc
AllAny+
prop L
AllAny+
@ -3322,29 +3322,29 @@ No match
/\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp
------------------------------------------------------------------
Bra
prop L& +
prop Lc +
AllAny
prop L& +
prop L&
notprop L& ++
prop L&
prop L& +
prop Lc +
prop Lc
notprop Lc ++
prop Lc
prop Lc +
prop L
prop L& +
prop Lc +
prop Lu
prop L& +
prop Lc +
prop Han
prop L& +
prop Lc +
prop Xan
prop L& ++
prop Lc ++
notprop Xan
prop L& ++
prop Lc ++
prop Xsp
prop L& ++
prop Lc ++
prop Xps
prop Xwd +
prop L&
prop L& +
prop Lc
prop Lc +
prop Xuc
Ket
End
@ -3356,7 +3356,7 @@ No match
prop N +
AllAny
prop N +
prop L&
prop Lc
prop N ++
prop L
prop N +
@ -3387,7 +3387,7 @@ No match
prop Lu +
AllAny
prop Lu +
prop L&
prop Lc
prop Lu +
prop L
prop Lu +
@ -3420,7 +3420,7 @@ No match
prop Han +
prop Lu
prop Han +
prop L&
prop Lc
prop Han +
prop L
prop Han +
@ -3449,9 +3449,9 @@ No match
prop Xan +
AllAny
prop Xan +
prop L&
prop Lc
notprop Xan ++
prop L&
prop Lc
prop Xan +
prop L
prop Xan +
@ -3480,7 +3480,7 @@ No match
prop Xsp +
AllAny
prop Xsp ++
prop L&
prop Lc
prop Xsp ++
prop L
prop Xsp ++
@ -3509,7 +3509,7 @@ No match
prop Xwd +
AllAny
prop Xwd +
prop L&
prop Lc
prop Xwd +
prop L
prop Xwd +
@ -3538,7 +3538,7 @@ No match
prop Xuc +
AllAny
prop Xuc +
prop L&
prop Lc
prop Xuc +
prop L
prop Xuc +
@ -4949,4 +4949,22 @@ Subject length lower bound = 3
0: \xc1\xe1
1: \xc1
/\p{L&}+\p{bidi_control}/B
------------------------------------------------------------------
Bra
prop Lc ++
prop Bidicontrol
Ket
End
------------------------------------------------------------------
/\p{bidi_control}+\p{L&}/B
------------------------------------------------------------------
Bra
prop Bidicontrol ++
prop Lc
Ket
End
------------------------------------------------------------------
# End of testinput5

175
testdata/testoutput7 vendored
View File

@ -3539,4 +3539,179 @@ No match
0: \x{100}\x{200}\x{300}
<<<<<<< >>>>>>>
# -----------------------------------------------------------------------------
# Tests for bidi control and bidi class properties
/\p{ bidi_control }/utf
-->\x{202c}<--
0: \x{202c}
/\p{bidicontrol}+/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: \x{2066}\x{2067}\x{2068}\x{2069}
/\p{bidicontrol}+?/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
1: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}
2: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}
3: \x{61c}\x{200e}\x{200f}\x{202a}
4: \x{61c}\x{200e}\x{200f}
5: \x{61c}\x{200e}
6: \x{61c}
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: \x{2066}\x{2067}\x{2068}\x{2069}
1: \x{2066}\x{2067}\x{2068}
2: \x{2066}\x{2067}
3: \x{2066}
/\p{bidicontrol}++/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: \x{2066}\x{2067}\x{2068}\x{2069}
/[\p{bidi_control}]/utf
-->\x{202c}<--
0: \x{202c}
/[\p{bidicontrol}]+/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: \x{2066}\x{2067}\x{2068}\x{2069}
/[\p{bidicontrol}]+?/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
1: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}
2: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}
3: \x{61c}\x{200e}\x{200f}\x{202a}
4: \x{61c}\x{200e}\x{200f}
5: \x{61c}\x{200e}
6: \x{61c}
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: \x{2066}\x{2067}\x{2068}\x{2069}
1: \x{2066}\x{2067}\x{2068}
2: \x{2066}\x{2067}
3: \x{2066}
/[\p{bidicontrol}]++/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: \x{2066}\x{2067}\x{2068}\x{2069}
/[\p{bidicontrol}<>]+/utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: >\x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: >\x{2066}\x{2067}\x{2068}\x{2069}<
/\P{bidicontrol}+/g,utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: -->
0: <--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: -->
0: <--
/\p{^bidicontrol}+/g,utf
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
0: -->
0: <--
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
0: -->
0: <--
/\p{bidi class = al}/utf
-->\x{061D}<--
0: \x{61d}
/\p{bidi class = al}+/utf
-->\x{061D}\x{061e}\x{061f}<--
0: \x{61d}\x{61e}\x{61f}
/\p{bidi_class : AL}+?/utf
-->\x{061D}\x{061e}\x{061f}<--
0: \x{61d}\x{61e}\x{61f}
1: \x{61d}\x{61e}
2: \x{61d}
/\p{Bidi_Class : AL}++/utf
-->\x{061D}\x{061e}\x{061f}<--
0: \x{61d}\x{61e}\x{61f}
/\p{bidi class = aN}+/utf
-->\x{061D}\x{0602}\x{0604}\x{061f}<--
0: \x{602}\x{604}
/\p{bidi class = B}+/utf
-->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<--
0: \x{0a}\x{0d}\x{1c}\x{1e}\x{85}\x{2029}
/\p{bidi class:BN}+/utf
-->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<--
0: \x{00}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}
/\p{bidiclass:cs}+/utf
-->,.\x{060c}\x{ff1a}<--
0: ,.\x{60c}\x{ff1a}
/\p{bidiclass:En}+/utf
-->09\x{b2}\x{2074}\x{1fbf9}<--
0: 09\x{b2}\x{2074}\x{1fbf9}
/\p{bidiclass:es}+/utf
==>+-\x{207a}\x{ff0d}<==
0: +-\x{207a}\x{ff0d}
/\p{bidiclass:et}+/utf
-->#\{24}%\x{a2}\x{A838}\x{1e2ff}<--
0: #
/\p{bidiclass:FSI}+/utf
-->\x{2068}<--
0: \x{2068}
/\p{bidi class:L}+/utf
-->ABC<--
0: ABC
/\P{bidi class:L}+/utf
-->ABC<--
0: -->
/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf
-->\x{202a}\x{2066}\x{202d}<--
0: \x{202a}\x{2066}\x{202d}
/\p{bidi class:NSM}+/utf
-->\x{9bc}\x{a71}\x{e31}<--
0: \x{9bc}\x{a71}\x{e31}
/\p{bidi class:ON}+/utf
-->\x{21}'()*;@\x{384}\x{2039}<=-
0: >!'()*;@\x{384}\x{2039}<=
/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf
-->\x{202c}\x{2069}<--
0: \x{202c}\x{2069}
/\p{bidi class:R}+/utf
-->\x{590}\x{5c6}\x{200f}\x{10805}<--
0: \x{590}\x{5c6}\x{200f}\x{10805}
/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf
-->\x{202b}\x{2067}\x{202e}<--
0: \x{202b}\x{2067}\x{202e}
/\p{bidi class:S}+\p{bidiclass:WS}+/utf
-->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<--
0: \x{09}\x{0b}\x{1f} \x{0c} \x{2000} \x{3000}
# -----------------------------------------------------------------------------
# End of testinput7