Add support for Bidi_Control and Bidi_Class properties
This commit is contained in:
parent
823d4ac956
commit
0246c6bf64
7
HACKING
7
HACKING
|
@ -546,8 +546,9 @@ Each is followed by two code units that encode the desired property as a type
|
|||
and a value. The types are a set of #defines of the form PT_xxx, and the values
|
||||
are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file.
|
||||
The value is relevant only for PT_GC (General Category), PT_PC (Particular
|
||||
Category), PT_SC (Script), and the pseudo-property PT_CLIST, which is used to
|
||||
identify a list of case-equivalent characters when there are three or more.
|
||||
Category), PT_SC (Script), PT_BIDICL (Bidi Class), and the pseudo-property
|
||||
PT_CLIST, which is used to identify a list of case-equivalent characters when
|
||||
there are three or more.
|
||||
|
||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
||||
three code units: OP_PROP or OP_NOTPROP, and then the desired property type and
|
||||
|
@ -827,4 +828,4 @@ not a real opcode, but is used to check at compile time that tables indexed by
|
|||
opcode are the correct length, in order to catch updating errors.
|
||||
|
||||
Philip Hazel
|
||||
12 July 2019
|
||||
December 2021
|
||||
|
|
|
@ -29,6 +29,9 @@
|
|||
# Added script names for Unicode 12.1.0, 27-July-2019.
|
||||
# Added script names for Unicode 13.0.0, 10-March-2020.
|
||||
# Added Script names for Unicode 14.0.0, PCRE2-10.39
|
||||
# Added support for bidi class and bidi control, 06-December-2021
|
||||
# This also involved lower casing strings and removing underscores, in
|
||||
# accordance with Unicode's "loose matching" rules, which Perl observes.
|
||||
|
||||
script_names = ['Unknown', 'Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \
|
||||
'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \
|
||||
|
@ -78,21 +81,46 @@ category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
|
|||
|
||||
general_category_names = ['C', 'L', 'M', 'N', 'P', 'S', 'Z']
|
||||
|
||||
# First add the Unicode script and category names.
|
||||
bidiclass_names = ['bidiAL', 'bidiAN', 'bidiB', 'bidiBN', 'bidiCS', 'bidiEN',
|
||||
'bidiES', 'bidiET', 'bidiFSI', 'bidiL', 'bidiLRE', 'bidiLRI', 'bidiLRO',
|
||||
'bidiNSM', 'bidiON', 'bidiPDF', 'bidiPDI', 'bidiR', 'bidiRLE', 'bidiRLI',
|
||||
'bidiRLO', 'bidiS', 'bidiWS' ]
|
||||
|
||||
utt_table = list(zip(script_names, ['PT_SC'] * len(script_names)))
|
||||
utt_table += list(zip(category_names, ['PT_PC'] * len(category_names)))
|
||||
utt_table += list(zip(general_category_names, ['PT_GC'] * len(general_category_names)))
|
||||
# Create standardized versions of the names by lowercasing and removing
|
||||
# ampersands.
|
||||
|
||||
# Now add our own specials.
|
||||
def stdnames(x):
|
||||
y = [''] * len(x)
|
||||
for i in range(len(x)):
|
||||
y[i] = x[i].lower().replace('_', '')
|
||||
return y
|
||||
|
||||
utt_table.append(('Any', 'PT_ANY'))
|
||||
utt_table.append(('L&', 'PT_LAMP'))
|
||||
utt_table.append(('Xan', 'PT_ALNUM'))
|
||||
utt_table.append(('Xps', 'PT_PXSPACE'))
|
||||
utt_table.append(('Xsp', 'PT_SPACE'))
|
||||
utt_table.append(('Xuc', 'PT_UCNC'))
|
||||
utt_table.append(('Xwd', 'PT_WORD'))
|
||||
std_script_names = stdnames(script_names)
|
||||
std_category_names = stdnames(category_names)
|
||||
std_general_category_names = stdnames(general_category_names)
|
||||
std_bidiclass_names = stdnames(bidiclass_names)
|
||||
|
||||
# Create the table, starting with the Unicode script, category and bidi class
|
||||
# names. We keep both the standardized name and the original, because the
|
||||
# latter is used for the ucp_xx names.
|
||||
|
||||
utt_table = list(zip(std_script_names, script_names, ['PT_SC'] * len(script_names)))
|
||||
utt_table += list(zip(std_category_names, category_names, ['PT_PC'] * len(category_names)))
|
||||
utt_table += list(zip(std_general_category_names, general_category_names, ['PT_GC'] * len(general_category_names)))
|
||||
utt_table += list(zip(std_bidiclass_names, bidiclass_names, ['PT_BIDICL'] * len(bidiclass_names)))
|
||||
|
||||
# Now add our own specials. Note both the standardized and capitalized forms
|
||||
# are needed.
|
||||
|
||||
utt_table.append(('any', 'Any', 'PT_ANY'))
|
||||
utt_table.append(('bidicontrol', 'Bidi_Control', 'PT_BIDICO'))
|
||||
utt_table.append(('l&', 'L&', 'PT_LAMP'))
|
||||
utt_table.append(('lc', 'LC', 'PT_LAMP'))
|
||||
utt_table.append(('xan', 'Xan', 'PT_ALNUM'))
|
||||
utt_table.append(('xps', 'Xps', 'PT_PXSPACE'))
|
||||
utt_table.append(('xsp', 'Xsp', 'PT_SPACE'))
|
||||
utt_table.append(('xuc', 'Xuc', 'PT_UCNC'))
|
||||
utt_table.append(('xwd', 'Xwd', 'PT_WORD'))
|
||||
|
||||
# Sort the table.
|
||||
|
||||
|
@ -104,9 +132,7 @@ utt_table.sort()
|
|||
for utt in utt_table:
|
||||
print('#define STRING_%s0' % (utt[0].replace('&', '_AMPERSAND')), end=' ')
|
||||
for c in utt[0]:
|
||||
if c == '_':
|
||||
print('STR_UNDERSCORE', end=' ')
|
||||
elif c == '&':
|
||||
if c == '&':
|
||||
print('STR_AMPERSAND', end=' ')
|
||||
else:
|
||||
print('STR_%s' % c, end=' ');
|
||||
|
@ -121,20 +147,18 @@ for utt in utt_table:
|
|||
if utt == utt_table[-1]:
|
||||
last = ';'
|
||||
print(' STRING_%s0%s' % (utt[0].replace('&', '_AMPERSAND'), last))
|
||||
# This was how it was done before the EBCDIC-compatible modification.
|
||||
# print ' "%s\\0"%s' % (utt[0], last)
|
||||
|
||||
print('\nconst ucp_type_table PRIV(utt)[] = {')
|
||||
offset = 0
|
||||
last = ','
|
||||
for utt in utt_table:
|
||||
if utt[1] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE',
|
||||
'PT_SPACE', 'PT_UCNC', 'PT_WORD'):
|
||||
if utt[2] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE',
|
||||
'PT_SPACE', 'PT_UCNC', 'PT_WORD', 'PT_BIDICO'):
|
||||
value = '0'
|
||||
else:
|
||||
value = 'ucp_' + utt[0]
|
||||
value = 'ucp_' + utt[1]
|
||||
if utt == utt_table[-1]:
|
||||
last = ''
|
||||
print(' { %3d, %s, %s }%s' % (offset, utt[1], value, last))
|
||||
print(' { %3d, %s, %s }%s' % (offset, utt[2], value, last))
|
||||
offset += len(utt[0]) + 1
|
||||
print('};')
|
||||
|
|
|
@ -34,11 +34,15 @@ return code is always zero.
|
|||
There are three commands:
|
||||
|
||||
"findprop" must be followed by a space-separated list of Unicode code points as
|
||||
hex numbers, either without any prefix or starting with "U+". The output is one
|
||||
line per character, giving its Unicode properties followed by its other case or
|
||||
cases if one or more exist, followed by its Script Extension list if it is not
|
||||
just the same as the base script. This list is in square brackets. The
|
||||
properties are:
|
||||
hex numbers, either without any prefix or starting with "U+", or as individual
|
||||
UTF-8 characters preceded by '+'. For example:
|
||||
|
||||
findprop U+1234 5Abc +?
|
||||
|
||||
The output is one line per character, giving its Unicode properties followed by
|
||||
its other case or cases if one or more exist, followed by its Script Extension
|
||||
list if it is not just the same as the base script. This list is in square
|
||||
brackets. The properties are:
|
||||
|
||||
Bidi control shown as '*' if true
|
||||
Bidi class e.g. NSM (most common is L)
|
||||
|
@ -47,9 +51,13 @@ Specific type e.g. Upper case letter
|
|||
Script e.g. Medefaidrin
|
||||
Grapheme break type e.g. Extend (most common is Other)
|
||||
|
||||
The scripts names are all in lower case, with underscores removed, because
|
||||
that's how they are stored for "loose" matching.
|
||||
|
||||
"find" must be followed by a list of property names and their values. The
|
||||
values are case-sensitive. This finds characters that have those properties. If
|
||||
multiple properties are listed, they must all be matched. Currently supported:
|
||||
values are case-sensitive, except for bidi class. This finds characters that
|
||||
have those properties. If multiple properties are listed, they must all be
|
||||
matched. Currently supported:
|
||||
|
||||
script <name> The character must have this script property. Only one
|
||||
such script may be given.
|
||||
|
@ -59,7 +67,7 @@ multiple properties are listed, they must all be matched. Currently supported:
|
|||
type <abbrev> The character's specific type (e.g. Lu or Nd) must match.
|
||||
gbreak <name> The grapheme break property must match.
|
||||
bidi <class> The character's bidi class must match.
|
||||
bidi_control The character must be a bidi control character
|
||||
bidi_control The character must be a bidi control character
|
||||
|
||||
If a <name> or <abbrev> is preceded by !, the value must NOT be present. For
|
||||
Script Extensions, there may be a mixture of positive and negative
|
||||
|
@ -202,6 +210,41 @@ static const unsigned int utf8_table1[] = {
|
|||
static const int utf8_table2[] = {
|
||||
0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||
|
||||
/* Macro to pick up the remaining bytes of a UTF-8 character, advancing
|
||||
the pointer. */
|
||||
|
||||
#define GETUTF8INC(c, eptr) \
|
||||
{ \
|
||||
if ((c & 0x20u) == 0) \
|
||||
c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \
|
||||
else if ((c & 0x10u) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \
|
||||
eptr += 2; \
|
||||
} \
|
||||
else if ((c & 0x08u) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \
|
||||
((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
|
||||
eptr += 3; \
|
||||
} \
|
||||
else if ((c & 0x04u) == 0) \
|
||||
{ \
|
||||
c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \
|
||||
((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \
|
||||
(eptr[3] & 0x3fu); \
|
||||
eptr += 4; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \
|
||||
((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \
|
||||
((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \
|
||||
eptr += 5; \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert character value to UTF-8 *
|
||||
|
@ -267,6 +310,7 @@ for (i = 0; i < PRIV(utt_size); i++)
|
|||
u = PRIV(utt) + i;
|
||||
if (u->type == PT_SC && u->value == script) break;
|
||||
}
|
||||
|
||||
if (i < PRIV(utt_size))
|
||||
return PRIV(utt_names) + u->name_offset;
|
||||
|
||||
|
@ -601,7 +645,7 @@ while (*s != 0)
|
|||
}
|
||||
for (i = 0; i < sizeof(bd_names)/sizeof(char *); i += 2)
|
||||
{
|
||||
if (strcmp(CS (value + offset), CS bd_names[i]) == 0)
|
||||
if (strcasecmp(CS (value + offset), CS bd_names[i]) == 0)
|
||||
{
|
||||
bidiclass = i/2;
|
||||
break;
|
||||
|
@ -629,7 +673,7 @@ while (*s != 0)
|
|||
}
|
||||
}
|
||||
|
||||
if (script < 0 && scriptx_count == 0 && type < 0 && gbreak < 0 &&
|
||||
if (script < 0 && scriptx_count == 0 && type < 0 && gbreak < 0 &&
|
||||
bidiclass < 0 && !bidicontrol)
|
||||
{
|
||||
printf("** No properties specified\n");
|
||||
|
@ -787,12 +831,26 @@ if (strcmp(CS name, "findprop") == 0)
|
|||
unsigned int c;
|
||||
unsigned char *endptr;
|
||||
t = s;
|
||||
if (strncmp(CS t, "U+", 2) == 0) t += 2;
|
||||
c = strtoul(CS t, CSS(&endptr), 16);
|
||||
|
||||
if (*t == '+')
|
||||
{
|
||||
c = *(++t);
|
||||
if (c > 0x7fu)
|
||||
{
|
||||
GETCHARINC(c, t);
|
||||
}
|
||||
endptr = t+1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (strncmp(CS t, "U+", 2) == 0) t += 2;
|
||||
c = strtoul(CS t, CSS(&endptr), 16);
|
||||
}
|
||||
|
||||
if (*endptr != 0 && !isspace(*endptr))
|
||||
{
|
||||
while (*endptr != 0 && !isspace(*endptr)) endptr++;
|
||||
printf("** Invalid hex number: ignored \"%.*s\"\n", (int)(endptr-s), s);
|
||||
printf("** Invalid character specifier: ignored \"%.*s\"\n", (int)(endptr-s), s);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -884,19 +942,19 @@ if (argc > 1 && strcmp(argv[1], "-s") == 0)
|
|||
if (argc > first_arg)
|
||||
{
|
||||
int i;
|
||||
BOOL hexfirst = TRUE;
|
||||
BOOL datafirst = TRUE;
|
||||
char *arg = argv[first_arg];
|
||||
unsigned char *s = buffer;
|
||||
|
||||
if (strncmp(arg, "U+", 2) != 0 && !isdigit(*arg))
|
||||
if (*arg != '+' && strncmp(arg, "U+", 2) != 0 && !isdigit(*arg))
|
||||
{
|
||||
while (*arg != 0)
|
||||
{
|
||||
if (!isxdigit(*arg++)) { hexfirst = FALSE; break; }
|
||||
if (!isxdigit(*arg++)) { datafirst = FALSE; break; }
|
||||
}
|
||||
}
|
||||
|
||||
if (hexfirst)
|
||||
if (datafirst)
|
||||
{
|
||||
strcpy(CS s, "findprop ");
|
||||
s += 9;
|
||||
|
|
|
@ -1,409 +1,409 @@
|
|||
findprop 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
|
||||
U+0000 BN Control: Control, Common, Control
|
||||
U+0001 BN Control: Control, Common, Control
|
||||
U+0002 BN Control: Control, Common, Control
|
||||
U+0003 BN Control: Control, Common, Control
|
||||
U+0004 BN Control: Control, Common, Control
|
||||
U+0005 BN Control: Control, Common, Control
|
||||
U+0006 BN Control: Control, Common, Control
|
||||
U+0007 BN Control: Control, Common, Control
|
||||
U+0008 BN Control: Control, Common, Control
|
||||
U+0009 S Control: Control, Common, Control
|
||||
U+000A B Control: Control, Common, LF
|
||||
U+000B S Control: Control, Common, Control
|
||||
U+000C WS Control: Control, Common, Control
|
||||
U+000D B Control: Control, Common, CR
|
||||
U+000E BN Control: Control, Common, Control
|
||||
U+000F BN Control: Control, Common, Control
|
||||
U+0000 BN Control: Control, common, Control
|
||||
U+0001 BN Control: Control, common, Control
|
||||
U+0002 BN Control: Control, common, Control
|
||||
U+0003 BN Control: Control, common, Control
|
||||
U+0004 BN Control: Control, common, Control
|
||||
U+0005 BN Control: Control, common, Control
|
||||
U+0006 BN Control: Control, common, Control
|
||||
U+0007 BN Control: Control, common, Control
|
||||
U+0008 BN Control: Control, common, Control
|
||||
U+0009 S Control: Control, common, Control
|
||||
U+000A B Control: Control, common, LF
|
||||
U+000B S Control: Control, common, Control
|
||||
U+000C WS Control: Control, common, Control
|
||||
U+000D B Control: Control, common, CR
|
||||
U+000E BN Control: Control, common, Control
|
||||
U+000F BN Control: Control, common, Control
|
||||
findprop 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
|
||||
U+0010 BN Control: Control, Common, Control
|
||||
U+0011 BN Control: Control, Common, Control
|
||||
U+0012 BN Control: Control, Common, Control
|
||||
U+0013 BN Control: Control, Common, Control
|
||||
U+0014 BN Control: Control, Common, Control
|
||||
U+0015 BN Control: Control, Common, Control
|
||||
U+0016 BN Control: Control, Common, Control
|
||||
U+0017 BN Control: Control, Common, Control
|
||||
U+0018 BN Control: Control, Common, Control
|
||||
U+0019 BN Control: Control, Common, Control
|
||||
U+001A BN Control: Control, Common, Control
|
||||
U+001B BN Control: Control, Common, Control
|
||||
U+001C B Control: Control, Common, Control
|
||||
U+001D B Control: Control, Common, Control
|
||||
U+001E B Control: Control, Common, Control
|
||||
U+001F S Control: Control, Common, Control
|
||||
U+0010 BN Control: Control, common, Control
|
||||
U+0011 BN Control: Control, common, Control
|
||||
U+0012 BN Control: Control, common, Control
|
||||
U+0013 BN Control: Control, common, Control
|
||||
U+0014 BN Control: Control, common, Control
|
||||
U+0015 BN Control: Control, common, Control
|
||||
U+0016 BN Control: Control, common, Control
|
||||
U+0017 BN Control: Control, common, Control
|
||||
U+0018 BN Control: Control, common, Control
|
||||
U+0019 BN Control: Control, common, Control
|
||||
U+001A BN Control: Control, common, Control
|
||||
U+001B BN Control: Control, common, Control
|
||||
U+001C B Control: Control, common, Control
|
||||
U+001D B Control: Control, common, Control
|
||||
U+001E B Control: Control, common, Control
|
||||
U+001F S Control: Control, common, Control
|
||||
findprop 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
|
||||
U+0020 WS Separator: Space separator, Common, Other
|
||||
U+0021 ON Punctuation: Other punctuation, Common, Other
|
||||
U+0022 ON Punctuation: Other punctuation, Common, Other
|
||||
U+0023 ET Punctuation: Other punctuation, Common, Other
|
||||
U+0024 ET Symbol: Currency symbol, Common, Other
|
||||
U+0025 ET Punctuation: Other punctuation, Common, Other
|
||||
U+0026 ON Punctuation: Other punctuation, Common, Other
|
||||
U+0027 ON Punctuation: Other punctuation, Common, Other
|
||||
U+0028 ON Punctuation: Open punctuation, Common, Other
|
||||
U+0029 ON Punctuation: Close punctuation, Common, Other
|
||||
U+002A ON Punctuation: Other punctuation, Common, Other
|
||||
U+002B ES Symbol: Mathematical symbol, Common, Other
|
||||
U+002C CS Punctuation: Other punctuation, Common, Other
|
||||
U+002D ES Punctuation: Dash punctuation, Common, Other
|
||||
U+002E CS Punctuation: Other punctuation, Common, Other
|
||||
U+002F CS Punctuation: Other punctuation, Common, Other
|
||||
U+0020 WS Separator: Space separator, common, Other
|
||||
U+0021 ON Punctuation: Other punctuation, common, Other
|
||||
U+0022 ON Punctuation: Other punctuation, common, Other
|
||||
U+0023 ET Punctuation: Other punctuation, common, Other
|
||||
U+0024 ET Symbol: Currency symbol, common, Other
|
||||
U+0025 ET Punctuation: Other punctuation, common, Other
|
||||
U+0026 ON Punctuation: Other punctuation, common, Other
|
||||
U+0027 ON Punctuation: Other punctuation, common, Other
|
||||
U+0028 ON Punctuation: Open punctuation, common, Other
|
||||
U+0029 ON Punctuation: Close punctuation, common, Other
|
||||
U+002A ON Punctuation: Other punctuation, common, Other
|
||||
U+002B ES Symbol: Mathematical symbol, common, Other
|
||||
U+002C CS Punctuation: Other punctuation, common, Other
|
||||
U+002D ES Punctuation: Dash punctuation, common, Other
|
||||
U+002E CS Punctuation: Other punctuation, common, Other
|
||||
U+002F CS Punctuation: Other punctuation, common, Other
|
||||
findprop 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f
|
||||
U+0030 EN Number: Decimal number, Common, Other
|
||||
U+0031 EN Number: Decimal number, Common, Other
|
||||
U+0032 EN Number: Decimal number, Common, Other
|
||||
U+0033 EN Number: Decimal number, Common, Other
|
||||
U+0034 EN Number: Decimal number, Common, Other
|
||||
U+0035 EN Number: Decimal number, Common, Other
|
||||
U+0036 EN Number: Decimal number, Common, Other
|
||||
U+0037 EN Number: Decimal number, Common, Other
|
||||
U+0038 EN Number: Decimal number, Common, Other
|
||||
U+0039 EN Number: Decimal number, Common, Other
|
||||
U+003A CS Punctuation: Other punctuation, Common, Other
|
||||
U+003B ON Punctuation: Other punctuation, Common, Other
|
||||
U+003C ON Symbol: Mathematical symbol, Common, Other
|
||||
U+003D ON Symbol: Mathematical symbol, Common, Other
|
||||
U+003E ON Symbol: Mathematical symbol, Common, Other
|
||||
U+003F ON Punctuation: Other punctuation, Common, Other
|
||||
U+0030 EN Number: Decimal number, common, Other
|
||||
U+0031 EN Number: Decimal number, common, Other
|
||||
U+0032 EN Number: Decimal number, common, Other
|
||||
U+0033 EN Number: Decimal number, common, Other
|
||||
U+0034 EN Number: Decimal number, common, Other
|
||||
U+0035 EN Number: Decimal number, common, Other
|
||||
U+0036 EN Number: Decimal number, common, Other
|
||||
U+0037 EN Number: Decimal number, common, Other
|
||||
U+0038 EN Number: Decimal number, common, Other
|
||||
U+0039 EN Number: Decimal number, common, Other
|
||||
U+003A CS Punctuation: Other punctuation, common, Other
|
||||
U+003B ON Punctuation: Other punctuation, common, Other
|
||||
U+003C ON Symbol: Mathematical symbol, common, Other
|
||||
U+003D ON Symbol: Mathematical symbol, common, Other
|
||||
U+003E ON Symbol: Mathematical symbol, common, Other
|
||||
U+003F ON Punctuation: Other punctuation, common, Other
|
||||
findprop 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f
|
||||
U+0040 ON Punctuation: Other punctuation, Common, Other
|
||||
U+0041 L Letter: Upper case letter, Latin, Other, U+0061
|
||||
U+0042 L Letter: Upper case letter, Latin, Other, U+0062
|
||||
U+0043 L Letter: Upper case letter, Latin, Other, U+0063
|
||||
U+0044 L Letter: Upper case letter, Latin, Other, U+0064
|
||||
U+0045 L Letter: Upper case letter, Latin, Other, U+0065
|
||||
U+0046 L Letter: Upper case letter, Latin, Other, U+0066
|
||||
U+0047 L Letter: Upper case letter, Latin, Other, U+0067
|
||||
U+0048 L Letter: Upper case letter, Latin, Other, U+0068
|
||||
U+0049 L Letter: Upper case letter, Latin, Other, U+0069
|
||||
U+004A L Letter: Upper case letter, Latin, Other, U+006A
|
||||
U+004B L Letter: Upper case letter, Latin, Other, U+006B, U+212A
|
||||
U+004C L Letter: Upper case letter, Latin, Other, U+006C
|
||||
U+004D L Letter: Upper case letter, Latin, Other, U+006D
|
||||
U+004E L Letter: Upper case letter, Latin, Other, U+006E
|
||||
U+004F L Letter: Upper case letter, Latin, Other, U+006F
|
||||
U+0040 ON Punctuation: Other punctuation, common, Other
|
||||
U+0041 L Letter: Upper case letter, latin, Other, U+0061
|
||||
U+0042 L Letter: Upper case letter, latin, Other, U+0062
|
||||
U+0043 L Letter: Upper case letter, latin, Other, U+0063
|
||||
U+0044 L Letter: Upper case letter, latin, Other, U+0064
|
||||
U+0045 L Letter: Upper case letter, latin, Other, U+0065
|
||||
U+0046 L Letter: Upper case letter, latin, Other, U+0066
|
||||
U+0047 L Letter: Upper case letter, latin, Other, U+0067
|
||||
U+0048 L Letter: Upper case letter, latin, Other, U+0068
|
||||
U+0049 L Letter: Upper case letter, latin, Other, U+0069
|
||||
U+004A L Letter: Upper case letter, latin, Other, U+006A
|
||||
U+004B L Letter: Upper case letter, latin, Other, U+006B, U+212A
|
||||
U+004C L Letter: Upper case letter, latin, Other, U+006C
|
||||
U+004D L Letter: Upper case letter, latin, Other, U+006D
|
||||
U+004E L Letter: Upper case letter, latin, Other, U+006E
|
||||
U+004F L Letter: Upper case letter, latin, Other, U+006F
|
||||
findprop 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f
|
||||
U+0050 L Letter: Upper case letter, Latin, Other, U+0070
|
||||
U+0051 L Letter: Upper case letter, Latin, Other, U+0071
|
||||
U+0052 L Letter: Upper case letter, Latin, Other, U+0072
|
||||
U+0053 L Letter: Upper case letter, Latin, Other, U+0073, U+017F
|
||||
U+0054 L Letter: Upper case letter, Latin, Other, U+0074
|
||||
U+0055 L Letter: Upper case letter, Latin, Other, U+0075
|
||||
U+0056 L Letter: Upper case letter, Latin, Other, U+0076
|
||||
U+0057 L Letter: Upper case letter, Latin, Other, U+0077
|
||||
U+0058 L Letter: Upper case letter, Latin, Other, U+0078
|
||||
U+0059 L Letter: Upper case letter, Latin, Other, U+0079
|
||||
U+005A L Letter: Upper case letter, Latin, Other, U+007A
|
||||
U+005B ON Punctuation: Open punctuation, Common, Other
|
||||
U+005C ON Punctuation: Other punctuation, Common, Other
|
||||
U+005D ON Punctuation: Close punctuation, Common, Other
|
||||
U+005E ON Symbol: Modifier symbol, Common, Other
|
||||
U+005F ON Punctuation: Connector punctuation, Common, Other
|
||||
U+0050 L Letter: Upper case letter, latin, Other, U+0070
|
||||
U+0051 L Letter: Upper case letter, latin, Other, U+0071
|
||||
U+0052 L Letter: Upper case letter, latin, Other, U+0072
|
||||
U+0053 L Letter: Upper case letter, latin, Other, U+0073, U+017F
|
||||
U+0054 L Letter: Upper case letter, latin, Other, U+0074
|
||||
U+0055 L Letter: Upper case letter, latin, Other, U+0075
|
||||
U+0056 L Letter: Upper case letter, latin, Other, U+0076
|
||||
U+0057 L Letter: Upper case letter, latin, Other, U+0077
|
||||
U+0058 L Letter: Upper case letter, latin, Other, U+0078
|
||||
U+0059 L Letter: Upper case letter, latin, Other, U+0079
|
||||
U+005A L Letter: Upper case letter, latin, Other, U+007A
|
||||
U+005B ON Punctuation: Open punctuation, common, Other
|
||||
U+005C ON Punctuation: Other punctuation, common, Other
|
||||
U+005D ON Punctuation: Close punctuation, common, Other
|
||||
U+005E ON Symbol: Modifier symbol, common, Other
|
||||
U+005F ON Punctuation: Connector punctuation, common, Other
|
||||
findprop 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f
|
||||
U+0060 ON Symbol: Modifier symbol, Common, Other
|
||||
U+0061 L Letter: Lower case letter, Latin, Other, U+0041
|
||||
U+0062 L Letter: Lower case letter, Latin, Other, U+0042
|
||||
U+0063 L Letter: Lower case letter, Latin, Other, U+0043
|
||||
U+0064 L Letter: Lower case letter, Latin, Other, U+0044
|
||||
U+0065 L Letter: Lower case letter, Latin, Other, U+0045
|
||||
U+0066 L Letter: Lower case letter, Latin, Other, U+0046
|
||||
U+0067 L Letter: Lower case letter, Latin, Other, U+0047
|
||||
U+0068 L Letter: Lower case letter, Latin, Other, U+0048
|
||||
U+0069 L Letter: Lower case letter, Latin, Other, U+0049
|
||||
U+006A L Letter: Lower case letter, Latin, Other, U+004A
|
||||
U+006B L Letter: Lower case letter, Latin, Other, U+004B, U+212A
|
||||
U+006C L Letter: Lower case letter, Latin, Other, U+004C
|
||||
U+006D L Letter: Lower case letter, Latin, Other, U+004D
|
||||
U+006E L Letter: Lower case letter, Latin, Other, U+004E
|
||||
U+006F L Letter: Lower case letter, Latin, Other, U+004F
|
||||
U+0060 ON Symbol: Modifier symbol, common, Other
|
||||
U+0061 L Letter: Lower case letter, latin, Other, U+0041
|
||||
U+0062 L Letter: Lower case letter, latin, Other, U+0042
|
||||
U+0063 L Letter: Lower case letter, latin, Other, U+0043
|
||||
U+0064 L Letter: Lower case letter, latin, Other, U+0044
|
||||
U+0065 L Letter: Lower case letter, latin, Other, U+0045
|
||||
U+0066 L Letter: Lower case letter, latin, Other, U+0046
|
||||
U+0067 L Letter: Lower case letter, latin, Other, U+0047
|
||||
U+0068 L Letter: Lower case letter, latin, Other, U+0048
|
||||
U+0069 L Letter: Lower case letter, latin, Other, U+0049
|
||||
U+006A L Letter: Lower case letter, latin, Other, U+004A
|
||||
U+006B L Letter: Lower case letter, latin, Other, U+004B, U+212A
|
||||
U+006C L Letter: Lower case letter, latin, Other, U+004C
|
||||
U+006D L Letter: Lower case letter, latin, Other, U+004D
|
||||
U+006E L Letter: Lower case letter, latin, Other, U+004E
|
||||
U+006F L Letter: Lower case letter, latin, Other, U+004F
|
||||
findprop 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f
|
||||
U+0070 L Letter: Lower case letter, Latin, Other, U+0050
|
||||
U+0071 L Letter: Lower case letter, Latin, Other, U+0051
|
||||
U+0072 L Letter: Lower case letter, Latin, Other, U+0052
|
||||
U+0073 L Letter: Lower case letter, Latin, Other, U+0053, U+017F
|
||||
U+0074 L Letter: Lower case letter, Latin, Other, U+0054
|
||||
U+0075 L Letter: Lower case letter, Latin, Other, U+0055
|
||||
U+0076 L Letter: Lower case letter, Latin, Other, U+0056
|
||||
U+0077 L Letter: Lower case letter, Latin, Other, U+0057
|
||||
U+0078 L Letter: Lower case letter, Latin, Other, U+0058
|
||||
U+0079 L Letter: Lower case letter, Latin, Other, U+0059
|
||||
U+007A L Letter: Lower case letter, Latin, Other, U+005A
|
||||
U+007B ON Punctuation: Open punctuation, Common, Other
|
||||
U+007C ON Symbol: Mathematical symbol, Common, Other
|
||||
U+007D ON Punctuation: Close punctuation, Common, Other
|
||||
U+007E ON Symbol: Mathematical symbol, Common, Other
|
||||
U+007F BN Control: Control, Common, Control
|
||||
U+0070 L Letter: Lower case letter, latin, Other, U+0050
|
||||
U+0071 L Letter: Lower case letter, latin, Other, U+0051
|
||||
U+0072 L Letter: Lower case letter, latin, Other, U+0052
|
||||
U+0073 L Letter: Lower case letter, latin, Other, U+0053, U+017F
|
||||
U+0074 L Letter: Lower case letter, latin, Other, U+0054
|
||||
U+0075 L Letter: Lower case letter, latin, Other, U+0055
|
||||
U+0076 L Letter: Lower case letter, latin, Other, U+0056
|
||||
U+0077 L Letter: Lower case letter, latin, Other, U+0057
|
||||
U+0078 L Letter: Lower case letter, latin, Other, U+0058
|
||||
U+0079 L Letter: Lower case letter, latin, Other, U+0059
|
||||
U+007A L Letter: Lower case letter, latin, Other, U+005A
|
||||
U+007B ON Punctuation: Open punctuation, common, Other
|
||||
U+007C ON Symbol: Mathematical symbol, common, Other
|
||||
U+007D ON Punctuation: Close punctuation, common, Other
|
||||
U+007E ON Symbol: Mathematical symbol, common, Other
|
||||
U+007F BN Control: Control, common, Control
|
||||
|
||||
findprop 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
|
||||
U+0080 BN Control: Control, Common, Control
|
||||
U+0081 BN Control: Control, Common, Control
|
||||
U+0082 BN Control: Control, Common, Control
|
||||
U+0083 BN Control: Control, Common, Control
|
||||
U+0084 BN Control: Control, Common, Control
|
||||
U+0085 B Control: Control, Common, Control
|
||||
U+0086 BN Control: Control, Common, Control
|
||||
U+0087 BN Control: Control, Common, Control
|
||||
U+0088 BN Control: Control, Common, Control
|
||||
U+0089 BN Control: Control, Common, Control
|
||||
U+008A BN Control: Control, Common, Control
|
||||
U+008B BN Control: Control, Common, Control
|
||||
U+008C BN Control: Control, Common, Control
|
||||
U+008D BN Control: Control, Common, Control
|
||||
U+008E BN Control: Control, Common, Control
|
||||
U+008F BN Control: Control, Common, Control
|
||||
U+0080 BN Control: Control, common, Control
|
||||
U+0081 BN Control: Control, common, Control
|
||||
U+0082 BN Control: Control, common, Control
|
||||
U+0083 BN Control: Control, common, Control
|
||||
U+0084 BN Control: Control, common, Control
|
||||
U+0085 B Control: Control, common, Control
|
||||
U+0086 BN Control: Control, common, Control
|
||||
U+0087 BN Control: Control, common, Control
|
||||
U+0088 BN Control: Control, common, Control
|
||||
U+0089 BN Control: Control, common, Control
|
||||
U+008A BN Control: Control, common, Control
|
||||
U+008B BN Control: Control, common, Control
|
||||
U+008C BN Control: Control, common, Control
|
||||
U+008D BN Control: Control, common, Control
|
||||
U+008E BN Control: Control, common, Control
|
||||
U+008F BN Control: Control, common, Control
|
||||
findprop 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f
|
||||
U+0090 BN Control: Control, Common, Control
|
||||
U+0091 BN Control: Control, Common, Control
|
||||
U+0092 BN Control: Control, Common, Control
|
||||
U+0093 BN Control: Control, Common, Control
|
||||
U+0094 BN Control: Control, Common, Control
|
||||
U+0095 BN Control: Control, Common, Control
|
||||
U+0096 BN Control: Control, Common, Control
|
||||
U+0097 BN Control: Control, Common, Control
|
||||
U+0098 BN Control: Control, Common, Control
|
||||
U+0099 BN Control: Control, Common, Control
|
||||
U+009A BN Control: Control, Common, Control
|
||||
U+009B BN Control: Control, Common, Control
|
||||
U+009C BN Control: Control, Common, Control
|
||||
U+009D BN Control: Control, Common, Control
|
||||
U+009E BN Control: Control, Common, Control
|
||||
U+009F BN Control: Control, Common, Control
|
||||
U+0090 BN Control: Control, common, Control
|
||||
U+0091 BN Control: Control, common, Control
|
||||
U+0092 BN Control: Control, common, Control
|
||||
U+0093 BN Control: Control, common, Control
|
||||
U+0094 BN Control: Control, common, Control
|
||||
U+0095 BN Control: Control, common, Control
|
||||
U+0096 BN Control: Control, common, Control
|
||||
U+0097 BN Control: Control, common, Control
|
||||
U+0098 BN Control: Control, common, Control
|
||||
U+0099 BN Control: Control, common, Control
|
||||
U+009A BN Control: Control, common, Control
|
||||
U+009B BN Control: Control, common, Control
|
||||
U+009C BN Control: Control, common, Control
|
||||
U+009D BN Control: Control, common, Control
|
||||
U+009E BN Control: Control, common, Control
|
||||
U+009F BN Control: Control, common, Control
|
||||
findprop a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af
|
||||
U+00A0 CS Separator: Space separator, Common, Other
|
||||
U+00A1 ON Punctuation: Other punctuation, Common, Other
|
||||
U+00A2 ET Symbol: Currency symbol, Common, Other
|
||||
U+00A3 ET Symbol: Currency symbol, Common, Other
|
||||
U+00A4 ET Symbol: Currency symbol, Common, Other
|
||||
U+00A5 ET Symbol: Currency symbol, Common, Other
|
||||
U+00A6 ON Symbol: Other symbol, Common, Other
|
||||
U+00A7 ON Punctuation: Other punctuation, Common, Other
|
||||
U+00A8 ON Symbol: Modifier symbol, Common, Other
|
||||
U+00A9 ON Symbol: Other symbol, Common, Extended Pictographic
|
||||
U+00AA L Letter: Other letter, Latin, Other
|
||||
U+00AB ON Punctuation: Initial punctuation, Common, Other
|
||||
U+00AC ON Symbol: Mathematical symbol, Common, Other
|
||||
U+00AD BN Control: Format, Common, Control
|
||||
U+00AE ON Symbol: Other symbol, Common, Extended Pictographic
|
||||
U+00AF ON Symbol: Modifier symbol, Common, Other
|
||||
U+00A0 CS Separator: Space separator, common, Other
|
||||
U+00A1 ON Punctuation: Other punctuation, common, Other
|
||||
U+00A2 ET Symbol: Currency symbol, common, Other
|
||||
U+00A3 ET Symbol: Currency symbol, common, Other
|
||||
U+00A4 ET Symbol: Currency symbol, common, Other
|
||||
U+00A5 ET Symbol: Currency symbol, common, Other
|
||||
U+00A6 ON Symbol: Other symbol, common, Other
|
||||
U+00A7 ON Punctuation: Other punctuation, common, Other
|
||||
U+00A8 ON Symbol: Modifier symbol, common, Other
|
||||
U+00A9 ON Symbol: Other symbol, common, Extended Pictographic
|
||||
U+00AA L Letter: Other letter, latin, Other
|
||||
U+00AB ON Punctuation: Initial punctuation, common, Other
|
||||
U+00AC ON Symbol: Mathematical symbol, common, Other
|
||||
U+00AD BN Control: Format, common, Control
|
||||
U+00AE ON Symbol: Other symbol, common, Extended Pictographic
|
||||
U+00AF ON Symbol: Modifier symbol, common, Other
|
||||
findprop b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf
|
||||
U+00B0 ET Symbol: Other symbol, Common, Other
|
||||
U+00B1 ET Symbol: Mathematical symbol, Common, Other
|
||||
U+00B2 EN Number: Other number, Common, Other
|
||||
U+00B3 EN Number: Other number, Common, Other
|
||||
U+00B4 ON Symbol: Modifier symbol, Common, Other
|
||||
U+00B5 L Letter: Lower case letter, Common, Other, U+03BC, U+039C
|
||||
U+00B6 ON Punctuation: Other punctuation, Common, Other
|
||||
U+00B7 ON Punctuation: Other punctuation, Common, Other
|
||||
U+00B8 ON Symbol: Modifier symbol, Common, Other
|
||||
U+00B9 EN Number: Other number, Common, Other
|
||||
U+00BA L Letter: Other letter, Latin, Other
|
||||
U+00BB ON Punctuation: Final punctuation, Common, Other
|
||||
U+00BC ON Number: Other number, Common, Other
|
||||
U+00BD ON Number: Other number, Common, Other
|
||||
U+00BE ON Number: Other number, Common, Other
|
||||
U+00BF ON Punctuation: Other punctuation, Common, Other
|
||||
U+00B0 ET Symbol: Other symbol, common, Other
|
||||
U+00B1 ET Symbol: Mathematical symbol, common, Other
|
||||
U+00B2 EN Number: Other number, common, Other
|
||||
U+00B3 EN Number: Other number, common, Other
|
||||
U+00B4 ON Symbol: Modifier symbol, common, Other
|
||||
U+00B5 L Letter: Lower case letter, common, Other, U+03BC, U+039C
|
||||
U+00B6 ON Punctuation: Other punctuation, common, Other
|
||||
U+00B7 ON Punctuation: Other punctuation, common, Other
|
||||
U+00B8 ON Symbol: Modifier symbol, common, Other
|
||||
U+00B9 EN Number: Other number, common, Other
|
||||
U+00BA L Letter: Other letter, latin, Other
|
||||
U+00BB ON Punctuation: Final punctuation, common, Other
|
||||
U+00BC ON Number: Other number, common, Other
|
||||
U+00BD ON Number: Other number, common, Other
|
||||
U+00BE ON Number: Other number, common, Other
|
||||
U+00BF ON Punctuation: Other punctuation, common, Other
|
||||
findprop c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf
|
||||
U+00C0 L Letter: Upper case letter, Latin, Other, U+00E0
|
||||
U+00C1 L Letter: Upper case letter, Latin, Other, U+00E1
|
||||
U+00C2 L Letter: Upper case letter, Latin, Other, U+00E2
|
||||
U+00C3 L Letter: Upper case letter, Latin, Other, U+00E3
|
||||
U+00C4 L Letter: Upper case letter, Latin, Other, U+00E4
|
||||
U+00C5 L Letter: Upper case letter, Latin, Other, U+00E5, U+212B
|
||||
U+00C6 L Letter: Upper case letter, Latin, Other, U+00E6
|
||||
U+00C7 L Letter: Upper case letter, Latin, Other, U+00E7
|
||||
U+00C8 L Letter: Upper case letter, Latin, Other, U+00E8
|
||||
U+00C9 L Letter: Upper case letter, Latin, Other, U+00E9
|
||||
U+00CA L Letter: Upper case letter, Latin, Other, U+00EA
|
||||
U+00CB L Letter: Upper case letter, Latin, Other, U+00EB
|
||||
U+00CC L Letter: Upper case letter, Latin, Other, U+00EC
|
||||
U+00CD L Letter: Upper case letter, Latin, Other, U+00ED
|
||||
U+00CE L Letter: Upper case letter, Latin, Other, U+00EE
|
||||
U+00CF L Letter: Upper case letter, Latin, Other, U+00EF
|
||||
U+00C0 L Letter: Upper case letter, latin, Other, U+00E0
|
||||
U+00C1 L Letter: Upper case letter, latin, Other, U+00E1
|
||||
U+00C2 L Letter: Upper case letter, latin, Other, U+00E2
|
||||
U+00C3 L Letter: Upper case letter, latin, Other, U+00E3
|
||||
U+00C4 L Letter: Upper case letter, latin, Other, U+00E4
|
||||
U+00C5 L Letter: Upper case letter, latin, Other, U+00E5, U+212B
|
||||
U+00C6 L Letter: Upper case letter, latin, Other, U+00E6
|
||||
U+00C7 L Letter: Upper case letter, latin, Other, U+00E7
|
||||
U+00C8 L Letter: Upper case letter, latin, Other, U+00E8
|
||||
U+00C9 L Letter: Upper case letter, latin, Other, U+00E9
|
||||
U+00CA L Letter: Upper case letter, latin, Other, U+00EA
|
||||
U+00CB L Letter: Upper case letter, latin, Other, U+00EB
|
||||
U+00CC L Letter: Upper case letter, latin, Other, U+00EC
|
||||
U+00CD L Letter: Upper case letter, latin, Other, U+00ED
|
||||
U+00CE L Letter: Upper case letter, latin, Other, U+00EE
|
||||
U+00CF L Letter: Upper case letter, latin, Other, U+00EF
|
||||
findprop d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db dc dd de df
|
||||
U+00D0 L Letter: Upper case letter, Latin, Other, U+00F0
|
||||
U+00D1 L Letter: Upper case letter, Latin, Other, U+00F1
|
||||
U+00D2 L Letter: Upper case letter, Latin, Other, U+00F2
|
||||
U+00D3 L Letter: Upper case letter, Latin, Other, U+00F3
|
||||
U+00D4 L Letter: Upper case letter, Latin, Other, U+00F4
|
||||
U+00D5 L Letter: Upper case letter, Latin, Other, U+00F5
|
||||
U+00D6 L Letter: Upper case letter, Latin, Other, U+00F6
|
||||
U+00D7 ON Symbol: Mathematical symbol, Common, Other
|
||||
U+00D8 L Letter: Upper case letter, Latin, Other, U+00F8
|
||||
U+00D9 L Letter: Upper case letter, Latin, Other, U+00F9
|
||||
U+00DA L Letter: Upper case letter, Latin, Other, U+00FA
|
||||
U+00DB L Letter: Upper case letter, Latin, Other, U+00FB
|
||||
U+00DC L Letter: Upper case letter, Latin, Other, U+00FC
|
||||
U+00DD L Letter: Upper case letter, Latin, Other, U+00FD
|
||||
U+00DE L Letter: Upper case letter, Latin, Other, U+00FE
|
||||
U+00DF L Letter: Lower case letter, Latin, Other, U+1E9E
|
||||
U+00D0 L Letter: Upper case letter, latin, Other, U+00F0
|
||||
U+00D1 L Letter: Upper case letter, latin, Other, U+00F1
|
||||
U+00D2 L Letter: Upper case letter, latin, Other, U+00F2
|
||||
U+00D3 L Letter: Upper case letter, latin, Other, U+00F3
|
||||
U+00D4 L Letter: Upper case letter, latin, Other, U+00F4
|
||||
U+00D5 L Letter: Upper case letter, latin, Other, U+00F5
|
||||
U+00D6 L Letter: Upper case letter, latin, Other, U+00F6
|
||||
U+00D7 ON Symbol: Mathematical symbol, common, Other
|
||||
U+00D8 L Letter: Upper case letter, latin, Other, U+00F8
|
||||
U+00D9 L Letter: Upper case letter, latin, Other, U+00F9
|
||||
U+00DA L Letter: Upper case letter, latin, Other, U+00FA
|
||||
U+00DB L Letter: Upper case letter, latin, Other, U+00FB
|
||||
U+00DC L Letter: Upper case letter, latin, Other, U+00FC
|
||||
U+00DD L Letter: Upper case letter, latin, Other, U+00FD
|
||||
U+00DE L Letter: Upper case letter, latin, Other, U+00FE
|
||||
U+00DF L Letter: Lower case letter, latin, Other, U+1E9E
|
||||
findprop e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ea eb ec ed ee ef
|
||||
U+00E0 L Letter: Lower case letter, Latin, Other, U+00C0
|
||||
U+00E1 L Letter: Lower case letter, Latin, Other, U+00C1
|
||||
U+00E2 L Letter: Lower case letter, Latin, Other, U+00C2
|
||||
U+00E3 L Letter: Lower case letter, Latin, Other, U+00C3
|
||||
U+00E4 L Letter: Lower case letter, Latin, Other, U+00C4
|
||||
U+00E5 L Letter: Lower case letter, Latin, Other, U+00C5, U+212B
|
||||
U+00E6 L Letter: Lower case letter, Latin, Other, U+00C6
|
||||
U+00E7 L Letter: Lower case letter, Latin, Other, U+00C7
|
||||
U+00E8 L Letter: Lower case letter, Latin, Other, U+00C8
|
||||
U+00E9 L Letter: Lower case letter, Latin, Other, U+00C9
|
||||
U+00EA L Letter: Lower case letter, Latin, Other, U+00CA
|
||||
U+00EB L Letter: Lower case letter, Latin, Other, U+00CB
|
||||
U+00EC L Letter: Lower case letter, Latin, Other, U+00CC
|
||||
U+00ED L Letter: Lower case letter, Latin, Other, U+00CD
|
||||
U+00EE L Letter: Lower case letter, Latin, Other, U+00CE
|
||||
U+00EF L Letter: Lower case letter, Latin, Other, U+00CF
|
||||
U+00E0 L Letter: Lower case letter, latin, Other, U+00C0
|
||||
U+00E1 L Letter: Lower case letter, latin, Other, U+00C1
|
||||
U+00E2 L Letter: Lower case letter, latin, Other, U+00C2
|
||||
U+00E3 L Letter: Lower case letter, latin, Other, U+00C3
|
||||
U+00E4 L Letter: Lower case letter, latin, Other, U+00C4
|
||||
U+00E5 L Letter: Lower case letter, latin, Other, U+00C5, U+212B
|
||||
U+00E6 L Letter: Lower case letter, latin, Other, U+00C6
|
||||
U+00E7 L Letter: Lower case letter, latin, Other, U+00C7
|
||||
U+00E8 L Letter: Lower case letter, latin, Other, U+00C8
|
||||
U+00E9 L Letter: Lower case letter, latin, Other, U+00C9
|
||||
U+00EA L Letter: Lower case letter, latin, Other, U+00CA
|
||||
U+00EB L Letter: Lower case letter, latin, Other, U+00CB
|
||||
U+00EC L Letter: Lower case letter, latin, Other, U+00CC
|
||||
U+00ED L Letter: Lower case letter, latin, Other, U+00CD
|
||||
U+00EE L Letter: Lower case letter, latin, Other, U+00CE
|
||||
U+00EF L Letter: Lower case letter, latin, Other, U+00CF
|
||||
findprop f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
|
||||
U+00F0 L Letter: Lower case letter, Latin, Other, U+00D0
|
||||
U+00F1 L Letter: Lower case letter, Latin, Other, U+00D1
|
||||
U+00F2 L Letter: Lower case letter, Latin, Other, U+00D2
|
||||
U+00F3 L Letter: Lower case letter, Latin, Other, U+00D3
|
||||
U+00F4 L Letter: Lower case letter, Latin, Other, U+00D4
|
||||
U+00F5 L Letter: Lower case letter, Latin, Other, U+00D5
|
||||
U+00F6 L Letter: Lower case letter, Latin, Other, U+00D6
|
||||
U+00F7 ON Symbol: Mathematical symbol, Common, Other
|
||||
U+00F8 L Letter: Lower case letter, Latin, Other, U+00D8
|
||||
U+00F9 L Letter: Lower case letter, Latin, Other, U+00D9
|
||||
U+00FA L Letter: Lower case letter, Latin, Other, U+00DA
|
||||
U+00FB L Letter: Lower case letter, Latin, Other, U+00DB
|
||||
U+00FC L Letter: Lower case letter, Latin, Other, U+00DC
|
||||
U+00FD L Letter: Lower case letter, Latin, Other, U+00DD
|
||||
U+00FE L Letter: Lower case letter, Latin, Other, U+00DE
|
||||
U+00FF L Letter: Lower case letter, Latin, Other, U+0178
|
||||
U+00F0 L Letter: Lower case letter, latin, Other, U+00D0
|
||||
U+00F1 L Letter: Lower case letter, latin, Other, U+00D1
|
||||
U+00F2 L Letter: Lower case letter, latin, Other, U+00D2
|
||||
U+00F3 L Letter: Lower case letter, latin, Other, U+00D3
|
||||
U+00F4 L Letter: Lower case letter, latin, Other, U+00D4
|
||||
U+00F5 L Letter: Lower case letter, latin, Other, U+00D5
|
||||
U+00F6 L Letter: Lower case letter, latin, Other, U+00D6
|
||||
U+00F7 ON Symbol: Mathematical symbol, common, Other
|
||||
U+00F8 L Letter: Lower case letter, latin, Other, U+00D8
|
||||
U+00F9 L Letter: Lower case letter, latin, Other, U+00D9
|
||||
U+00FA L Letter: Lower case letter, latin, Other, U+00DA
|
||||
U+00FB L Letter: Lower case letter, latin, Other, U+00DB
|
||||
U+00FC L Letter: Lower case letter, latin, Other, U+00DC
|
||||
U+00FD L Letter: Lower case letter, latin, Other, U+00DD
|
||||
U+00FE L Letter: Lower case letter, latin, Other, U+00DE
|
||||
U+00FF L Letter: Lower case letter, latin, Other, U+0178
|
||||
|
||||
findprop 0100 0101 0102 0103 0104 0105 0106
|
||||
U+0100 L Letter: Upper case letter, Latin, Other, U+0101
|
||||
U+0101 L Letter: Lower case letter, Latin, Other, U+0100
|
||||
U+0102 L Letter: Upper case letter, Latin, Other, U+0103
|
||||
U+0103 L Letter: Lower case letter, Latin, Other, U+0102
|
||||
U+0104 L Letter: Upper case letter, Latin, Other, U+0105
|
||||
U+0105 L Letter: Lower case letter, Latin, Other, U+0104
|
||||
U+0106 L Letter: Upper case letter, Latin, Other, U+0107
|
||||
U+0100 L Letter: Upper case letter, latin, Other, U+0101
|
||||
U+0101 L Letter: Lower case letter, latin, Other, U+0100
|
||||
U+0102 L Letter: Upper case letter, latin, Other, U+0103
|
||||
U+0103 L Letter: Lower case letter, latin, Other, U+0102
|
||||
U+0104 L Letter: Upper case letter, latin, Other, U+0105
|
||||
U+0105 L Letter: Lower case letter, latin, Other, U+0104
|
||||
U+0106 L Letter: Upper case letter, latin, Other, U+0107
|
||||
|
||||
findprop ffe0 ffe1 ffe2 ffe3 ffe4 ffe5 ffe6 ffe7
|
||||
U+FFE0 ET Symbol: Currency symbol, Common, Other
|
||||
U+FFE1 ET Symbol: Currency symbol, Common, Other
|
||||
U+FFE2 ON Symbol: Mathematical symbol, Common, Other
|
||||
U+FFE3 ON Symbol: Modifier symbol, Common, Other
|
||||
U+FFE4 ON Symbol: Other symbol, Common, Other
|
||||
U+FFE5 ET Symbol: Currency symbol, Common, Other
|
||||
U+FFE6 ET Symbol: Currency symbol, Common, Other
|
||||
U+FFE7 L Control: Unassigned, Unknown, Other
|
||||
U+FFE0 ET Symbol: Currency symbol, common, Other
|
||||
U+FFE1 ET Symbol: Currency symbol, common, Other
|
||||
U+FFE2 ON Symbol: Mathematical symbol, common, Other
|
||||
U+FFE3 ON Symbol: Modifier symbol, common, Other
|
||||
U+FFE4 ON Symbol: Other symbol, common, Other
|
||||
U+FFE5 ET Symbol: Currency symbol, common, Other
|
||||
U+FFE6 ET Symbol: Currency symbol, common, Other
|
||||
U+FFE7 L Control: Unassigned, unknown, Other
|
||||
findprop ffe8 ffe9 ffea ffeb ffec ffed ffee ffef
|
||||
U+FFE8 ON Symbol: Other symbol, Common, Other
|
||||
U+FFE9 ON Symbol: Mathematical symbol, Common, Other
|
||||
U+FFEA ON Symbol: Mathematical symbol, Common, Other
|
||||
U+FFEB ON Symbol: Mathematical symbol, Common, Other
|
||||
U+FFEC ON Symbol: Mathematical symbol, Common, Other
|
||||
U+FFED ON Symbol: Other symbol, Common, Other
|
||||
U+FFEE ON Symbol: Other symbol, Common, Other
|
||||
U+FFEF L Control: Unassigned, Unknown, Other
|
||||
U+FFE8 ON Symbol: Other symbol, common, Other
|
||||
U+FFE9 ON Symbol: Mathematical symbol, common, Other
|
||||
U+FFEA ON Symbol: Mathematical symbol, common, Other
|
||||
U+FFEB ON Symbol: Mathematical symbol, common, Other
|
||||
U+FFEC ON Symbol: Mathematical symbol, common, Other
|
||||
U+FFED ON Symbol: Other symbol, common, Other
|
||||
U+FFEE ON Symbol: Other symbol, common, Other
|
||||
U+FFEF L Control: Unassigned, unknown, Other
|
||||
findprop fff8 fff9 fffa fffb fffc fffd fffe ffff
|
||||
U+FFF8 BN Control: Unassigned, Unknown, Control
|
||||
U+FFF9 ON Control: Format, Common, Control
|
||||
U+FFFA ON Control: Format, Common, Control
|
||||
U+FFFB ON Control: Format, Common, Control
|
||||
U+FFFC ON Symbol: Other symbol, Common, Other
|
||||
U+FFFD ON Symbol: Other symbol, Common, Other
|
||||
U+FFFE BN Control: Unassigned, Unknown, Other
|
||||
U+FFFF BN Control: Unassigned, Unknown, Other
|
||||
U+FFF8 BN Control: Unassigned, unknown, Control
|
||||
U+FFF9 ON Control: Format, common, Control
|
||||
U+FFFA ON Control: Format, common, Control
|
||||
U+FFFB ON Control: Format, common, Control
|
||||
U+FFFC ON Symbol: Other symbol, common, Other
|
||||
U+FFFD ON Symbol: Other symbol, common, Other
|
||||
U+FFFE BN Control: Unassigned, unknown, Other
|
||||
U+FFFF BN Control: Unassigned, unknown, Other
|
||||
findprop 10000 10001 e01ef f0000 100000
|
||||
U+10000 L Letter: Other letter, Linear_B, Other
|
||||
U+10001 L Letter: Other letter, Linear_B, Other
|
||||
U+E01EF NSM Mark: Non-spacing mark, Inherited, Extend
|
||||
U+F0000 L Control: Private use, Unknown, Other
|
||||
U+100000 L Control: Private use, Unknown, Other
|
||||
U+10000 L Letter: Other letter, linearb, Other
|
||||
U+10001 L Letter: Other letter, linearb, Other
|
||||
U+E01EF NSM Mark: Non-spacing mark, inherited, Extend
|
||||
U+F0000 L Control: Private use, unknown, Other
|
||||
U+100000 L Control: Private use, unknown, Other
|
||||
|
||||
findprop 1b00 12000 7c0 a840 10900
|
||||
U+1B00 NSM Mark: Non-spacing mark, Balinese, Extend
|
||||
U+12000 L Letter: Other letter, Cuneiform, Other
|
||||
U+07C0 R Number: Decimal number, Nko, Other
|
||||
U+A840 L Letter: Other letter, Phags_Pa, Other
|
||||
U+10900 R Letter: Other letter, Phoenician, Other
|
||||
U+1B00 NSM Mark: Non-spacing mark, balinese, Extend
|
||||
U+12000 L Letter: Other letter, cuneiform, Other
|
||||
U+07C0 R Number: Decimal number, nko, Other
|
||||
U+A840 L Letter: Other letter, phagspa, Other
|
||||
U+10900 R Letter: Other letter, phoenician, Other
|
||||
findprop 1d79 a77d
|
||||
U+1D79 L Letter: Lower case letter, Latin, Other, U+A77D
|
||||
U+A77D L Letter: Upper case letter, Latin, Other, U+1D79
|
||||
U+1D79 L Letter: Lower case letter, latin, Other, U+A77D
|
||||
U+A77D L Letter: Upper case letter, latin, Other, U+1D79
|
||||
|
||||
findprop 0800 083e a4d0 a4f7 aa80 aadf
|
||||
U+0800 R Letter: Other letter, Samaritan, Other
|
||||
U+083E R Punctuation: Other punctuation, Samaritan, Other
|
||||
U+A4D0 L Letter: Other letter, Lisu, Other
|
||||
U+A4F7 L Letter: Other letter, Lisu, Other
|
||||
U+AA80 L Letter: Other letter, Tai_Viet, Other
|
||||
U+AADF L Punctuation: Other punctuation, Tai_Viet, Other
|
||||
U+0800 R Letter: Other letter, samaritan, Other
|
||||
U+083E R Punctuation: Other punctuation, samaritan, Other
|
||||
U+A4D0 L Letter: Other letter, lisu, Other
|
||||
U+A4F7 L Letter: Other letter, lisu, Other
|
||||
U+AA80 L Letter: Other letter, taiviet, Other
|
||||
U+AADF L Punctuation: Other punctuation, taiviet, Other
|
||||
findprop 10b00 10b35 13000 1342e 10840 10855
|
||||
U+10B00 R Letter: Other letter, Avestan, Other
|
||||
U+10B35 R Letter: Other letter, Avestan, Other
|
||||
U+13000 L Letter: Other letter, Egyptian_Hieroglyphs, Other
|
||||
U+1342E L Letter: Other letter, Egyptian_Hieroglyphs, Other
|
||||
U+10840 R Letter: Other letter, Imperial_Aramaic, Other
|
||||
U+10855 R Letter: Other letter, Imperial_Aramaic, Other
|
||||
U+10B00 R Letter: Other letter, avestan, Other
|
||||
U+10B35 R Letter: Other letter, avestan, Other
|
||||
U+13000 L Letter: Other letter, egyptianhieroglyphs, Other
|
||||
U+1342E L Letter: Other letter, egyptianhieroglyphs, Other
|
||||
U+10840 R Letter: Other letter, imperialaramaic, Other
|
||||
U+10855 R Letter: Other letter, imperialaramaic, Other
|
||||
|
||||
findprop 11100 1113c 11680 116c0
|
||||
U+11100 NSM Mark: Non-spacing mark, Chakma, Extend
|
||||
U+1113C L Number: Decimal number, Chakma, Other
|
||||
U+11680 L Letter: Other letter, Takri, Other
|
||||
U+116C0 L Number: Decimal number, Takri, Other
|
||||
U+11100 NSM Mark: Non-spacing mark, chakma, Extend
|
||||
U+1113C L Number: Decimal number, chakma, Other
|
||||
U+11680 L Letter: Other letter, takri, Other
|
||||
U+116C0 L Number: Decimal number, takri, Other
|
||||
|
||||
findprop 0d 0a 0e 0711 1b04 1111 1169 11fe ae4c ad89
|
||||
U+000D B Control: Control, Common, CR
|
||||
U+000A B Control: Control, Common, LF
|
||||
U+000E BN Control: Control, Common, Control
|
||||
U+0711 NSM Mark: Non-spacing mark, Syriac, Extend
|
||||
U+1B04 L Mark: Spacing mark, Balinese, SpacingMark
|
||||
U+1111 L Letter: Other letter, Hangul, Hangul syllable type L
|
||||
U+1169 L Letter: Other letter, Hangul, Hangul syllable type V
|
||||
U+11FE L Letter: Other letter, Hangul, Hangul syllable type T
|
||||
U+AE4C L Letter: Other letter, Hangul, Hangul syllable type LV
|
||||
U+AD89 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+000D B Control: Control, common, CR
|
||||
U+000A B Control: Control, common, LF
|
||||
U+000E BN Control: Control, common, Control
|
||||
U+0711 NSM Mark: Non-spacing mark, syriac, Extend
|
||||
U+1B04 L Mark: Spacing mark, balinese, SpacingMark
|
||||
U+1111 L Letter: Other letter, hangul, Hangul syllable type L
|
||||
U+1169 L Letter: Other letter, hangul, Hangul syllable type V
|
||||
U+11FE L Letter: Other letter, hangul, Hangul syllable type T
|
||||
U+AE4C L Letter: Other letter, hangul, Hangul syllable type LV
|
||||
U+AD89 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
|
||||
findprop 118a0 11ac7 16ad0
|
||||
U+118A0 L Letter: Upper case letter, Warang_Citi, Other, U+118C0
|
||||
U+11AC7 L Letter: Other letter, Pau_Cin_Hau, Other
|
||||
U+16AD0 L Letter: Other letter, Bassa_Vah, Other
|
||||
U+118A0 L Letter: Upper case letter, warangciti, Other, U+118C0
|
||||
U+11AC7 L Letter: Other letter, paucinhau, Other
|
||||
U+16AD0 L Letter: Other letter, bassavah, Other
|
||||
|
||||
findprop 11700 14400 108e0 11280 1d800
|
||||
U+11700 L Letter: Other letter, Ahom, Other
|
||||
U+14400 L Letter: Other letter, Anatolian_Hieroglyphs, Other
|
||||
U+108E0 R Letter: Other letter, Hatran, Other
|
||||
U+11280 L Letter: Other letter, Multani, Other
|
||||
U+1D800 L Symbol: Other symbol, SignWriting, Other
|
||||
U+11700 L Letter: Other letter, ahom, Other
|
||||
U+14400 L Letter: Other letter, anatolianhieroglyphs, Other
|
||||
U+108E0 R Letter: Other letter, hatran, Other
|
||||
U+11280 L Letter: Other letter, multani, Other
|
||||
U+1D800 L Symbol: Other symbol, signwriting, Other
|
||||
|
||||
findprop 11800 1e903 11da9 10d27 11ee0 16e48 10f27 10f30
|
||||
U+11800 L Letter: Other letter, Dogra, Other
|
||||
U+1E903 R Letter: Upper case letter, Adlam, Other, U+1E925
|
||||
U+11DA9 L Number: Decimal number, Gunjala_Gondi, Other
|
||||
U+10D27 NSM Mark: Non-spacing mark, Hanifi_Rohingya, Extend
|
||||
U+11EE0 L Letter: Other letter, Makasar, Other
|
||||
U+16E48 L Letter: Upper case letter, Medefaidrin, Other, U+16E68
|
||||
U+10F27 R Letter: Other letter, Old_Sogdian, Other
|
||||
U+10F30 AL Letter: Other letter, Sogdian, Other
|
||||
U+11800 L Letter: Other letter, dogra, Other
|
||||
U+1E903 R Letter: Upper case letter, adlam, Other, U+1E925
|
||||
U+11DA9 L Number: Decimal number, gunjalagondi, Other
|
||||
U+10D27 NSM Mark: Non-spacing mark, hanifirohingya, Extend
|
||||
U+11EE0 L Letter: Other letter, makasar, Other
|
||||
U+16E48 L Letter: Upper case letter, medefaidrin, Other, U+16E68
|
||||
U+10F27 R Letter: Other letter, oldsogdian, Other
|
||||
U+10F30 AL Letter: Other letter, sogdian, Other
|
||||
|
||||
findprop a836 a833 1cf4 20f0 1cd0
|
||||
U+A836 L Symbol: Other symbol, Common, Other, [Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kaithi, Mahajani, Modi, Khudawadi, Takri, Tirhuta]
|
||||
U+A833 L Number: Other number, Common, Other, [Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kannada, Kaithi, Mahajani, Modi, Nandinagari, Khudawadi, Takri, Tirhuta]
|
||||
U+1CF4 NSM Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Kannada]
|
||||
U+20F0 NSM Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Latin]
|
||||
U+1CD0 NSM Mark: Non-spacing mark, Inherited, Extend, [Bengali, Devanagari, Grantha, Kannada]
|
||||
U+A836 L Symbol: Other symbol, common, Other, [devanagari, dogra, gujarati, gurmukhi, khojki, kaithi, mahajani, modi, khudawadi, takri, tirhuta]
|
||||
U+A833 L Number: Other number, common, Other, [devanagari, dogra, gujarati, gurmukhi, khojki, kannada, kaithi, mahajani, modi, nandinagari, khudawadi, takri, tirhuta]
|
||||
U+1CF4 NSM Mark: Non-spacing mark, inherited, Extend, [devanagari, grantha, kannada]
|
||||
U+20F0 NSM Mark: Non-spacing mark, inherited, Extend, [devanagari, grantha, latin]
|
||||
U+1CD0 NSM Mark: Non-spacing mark, inherited, Extend, [bengali, devanagari, grantha, kannada]
|
||||
|
||||
findprop 32ff
|
||||
U+32FF L Symbol: Other symbol, Common, Other, [Han]
|
||||
U+32FF L Symbol: Other symbol, common, Other, [han]
|
||||
|
||||
findprop 1f16d
|
||||
U+1F16D ON Symbol: Other symbol, Common, Extended Pictographic
|
||||
U+1F16D ON Symbol: Other symbol, common, Extended Pictographic
|
||||
|
||||
findprop U+10e93 U+10eaa
|
||||
U+10E93 R Letter: Other letter, Yezidi, Other
|
||||
U+10EAA R Control: Unassigned, Unknown, Other
|
||||
U+10E93 R Letter: Other letter, yezidi, Other
|
||||
U+10EAA R Control: Unassigned, unknown, Other
|
||||
|
||||
findprop 0602 202a 202b 202c 2068 2069 202d 202e 2067
|
||||
U+0602 AN Control: Format, Arabic, Prepend
|
||||
U+202A *LRE Control: Format, Common, Control
|
||||
U+202B *RLE Control: Format, Common, Control
|
||||
U+202C *PDF Control: Format, Common, Control
|
||||
U+2068 *FSI Control: Format, Common, Control
|
||||
U+2069 *PDI Control: Format, Common, Control
|
||||
U+202D *LRO Control: Format, Common, Control
|
||||
U+202E *RLO Control: Format, Common, Control
|
||||
U+2067 *RLI Control: Format, Common, Control
|
||||
U+0602 AN Control: Format, arabic, Prepend
|
||||
U+202A *LRE Control: Format, common, Control
|
||||
U+202B *RLE Control: Format, common, Control
|
||||
U+202C *PDF Control: Format, common, Control
|
||||
U+2068 *FSI Control: Format, common, Control
|
||||
U+2069 *PDI Control: Format, common, Control
|
||||
U+202D *LRO Control: Format, common, Control
|
||||
U+202E *RLO Control: Format, common, Control
|
||||
U+2067 *RLI Control: Format, common, Control
|
||||
|
|
|
@ -1,253 +1,220 @@
|
|||
find script Han
|
||||
U+2E80..U+2E99 ON Symbol: Other symbol, Han, Other
|
||||
U+2E9B..U+2EF3 ON Symbol: Other symbol, Han, Other
|
||||
U+2F00..U+2FD5 ON Symbol: Other symbol, Han, Other
|
||||
U+3005 L Letter: Modifier letter, Han, Other
|
||||
U+3007 L Number: Letter number, Han, Other
|
||||
U+3021..U+3029 L Number: Letter number, Han, Other
|
||||
U+3038..U+303A L Number: Letter number, Han, Other
|
||||
U+303B L Letter: Modifier letter, Han, Other
|
||||
U+3400..U+4DBF L Letter: Other letter, Han, Other
|
||||
U+4E00..U+9FFF L Letter: Other letter, Han, Other
|
||||
U+F900..U+FA6D L Letter: Other letter, Han, Other
|
||||
U+FA70..U+FAD9 L Letter: Other letter, Han, Other
|
||||
U+16FE2 ON Punctuation: Other punctuation, Han, Other
|
||||
U+16FE3 L Letter: Modifier letter, Han, Other
|
||||
U+16FF0..U+16FF1 L Mark: Spacing mark, Han, SpacingMark
|
||||
U+20000..U+2A6DF L Letter: Other letter, Han, Other
|
||||
U+2A700..U+2B738 L Letter: Other letter, Han, Other
|
||||
U+2B740..U+2B81D L Letter: Other letter, Han, Other
|
||||
U+2B820..U+2CEA1 L Letter: Other letter, Han, Other
|
||||
U+2CEB0..U+2EBE0 L Letter: Other letter, Han, Other
|
||||
U+2F800..U+2FA1D L Letter: Other letter, Han, Other
|
||||
U+30000..U+3134A L Letter: Other letter, Han, Other
|
||||
** Unrecognized script name "Han"
|
||||
find type Pe script Common scriptx Hangul
|
||||
U+3009 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+300B ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+300D ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+300F ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+3011 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+3015 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+3017 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+3019 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+301B ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
U+301E..U+301F ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana]
|
||||
U+FF63 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi]
|
||||
** Unrecognized script name "Common"
|
||||
find type Sk
|
||||
U+005E ON Symbol: Modifier symbol, Common, Other
|
||||
U+0060 ON Symbol: Modifier symbol, Common, Other
|
||||
U+00A8 ON Symbol: Modifier symbol, Common, Other
|
||||
U+00AF ON Symbol: Modifier symbol, Common, Other
|
||||
U+00B4 ON Symbol: Modifier symbol, Common, Other
|
||||
U+00B8 ON Symbol: Modifier symbol, Common, Other
|
||||
U+02C2..U+02C5 ON Symbol: Modifier symbol, Common, Other
|
||||
U+02D2..U+02DF ON Symbol: Modifier symbol, Common, Other
|
||||
U+02E5..U+02E9 ON Symbol: Modifier symbol, Common, Other
|
||||
U+02EA..U+02EB ON Symbol: Modifier symbol, Bopomofo, Other
|
||||
U+02ED ON Symbol: Modifier symbol, Common, Other
|
||||
U+02EF..U+02FF ON Symbol: Modifier symbol, Common, Other
|
||||
U+0375 ON Symbol: Modifier symbol, Greek, Other
|
||||
U+0384 ON Symbol: Modifier symbol, Greek, Other
|
||||
U+0385 ON Symbol: Modifier symbol, Common, Other
|
||||
U+0888 AL Symbol: Modifier symbol, Arabic, Other
|
||||
U+1FBD ON Symbol: Modifier symbol, Greek, Other
|
||||
U+1FBF..U+1FC1 ON Symbol: Modifier symbol, Greek, Other
|
||||
U+1FCD..U+1FCF ON Symbol: Modifier symbol, Greek, Other
|
||||
U+1FDD..U+1FDF ON Symbol: Modifier symbol, Greek, Other
|
||||
U+1FED..U+1FEF ON Symbol: Modifier symbol, Greek, Other
|
||||
U+1FFD..U+1FFE ON Symbol: Modifier symbol, Greek, Other
|
||||
U+309B..U+309C ON Symbol: Modifier symbol, Common, Other, [Hiragana, Katakana]
|
||||
U+A700..U+A707 ON Symbol: Modifier symbol, Common, Other, [Han, Latin]
|
||||
U+A708..U+A716 ON Symbol: Modifier symbol, Common, Other
|
||||
U+A720..U+A721 ON Symbol: Modifier symbol, Common, Other
|
||||
U+A789..U+A78A L Symbol: Modifier symbol, Common, Other
|
||||
U+AB5B L Symbol: Modifier symbol, Common, Other
|
||||
U+AB6A..U+AB6B ON Symbol: Modifier symbol, Common, Other
|
||||
U+FBB2..U+FBC2 AL Symbol: Modifier symbol, Arabic, Other
|
||||
U+FF3E ON Symbol: Modifier symbol, Common, Other
|
||||
U+FF40 ON Symbol: Modifier symbol, Common, Other
|
||||
U+FFE3 ON Symbol: Modifier symbol, Common, Other
|
||||
U+1F3FB..U+1F3FF ON Symbol: Modifier symbol, Common, Extend
|
||||
U+005E ON Symbol: Modifier symbol, common, Other
|
||||
U+0060 ON Symbol: Modifier symbol, common, Other
|
||||
U+00A8 ON Symbol: Modifier symbol, common, Other
|
||||
U+00AF ON Symbol: Modifier symbol, common, Other
|
||||
U+00B4 ON Symbol: Modifier symbol, common, Other
|
||||
U+00B8 ON Symbol: Modifier symbol, common, Other
|
||||
U+02C2..U+02C5 ON Symbol: Modifier symbol, common, Other
|
||||
U+02D2..U+02DF ON Symbol: Modifier symbol, common, Other
|
||||
U+02E5..U+02E9 ON Symbol: Modifier symbol, common, Other
|
||||
U+02EA..U+02EB ON Symbol: Modifier symbol, bopomofo, Other
|
||||
U+02ED ON Symbol: Modifier symbol, common, Other
|
||||
U+02EF..U+02FF ON Symbol: Modifier symbol, common, Other
|
||||
U+0375 ON Symbol: Modifier symbol, greek, Other
|
||||
U+0384 ON Symbol: Modifier symbol, greek, Other
|
||||
U+0385 ON Symbol: Modifier symbol, common, Other
|
||||
U+0888 AL Symbol: Modifier symbol, arabic, Other
|
||||
U+1FBD ON Symbol: Modifier symbol, greek, Other
|
||||
U+1FBF..U+1FC1 ON Symbol: Modifier symbol, greek, Other
|
||||
U+1FCD..U+1FCF ON Symbol: Modifier symbol, greek, Other
|
||||
U+1FDD..U+1FDF ON Symbol: Modifier symbol, greek, Other
|
||||
U+1FED..U+1FEF ON Symbol: Modifier symbol, greek, Other
|
||||
U+1FFD..U+1FFE ON Symbol: Modifier symbol, greek, Other
|
||||
U+309B..U+309C ON Symbol: Modifier symbol, common, Other, [hiragana, katakana]
|
||||
U+A700..U+A707 ON Symbol: Modifier symbol, common, Other, [han, latin]
|
||||
U+A708..U+A716 ON Symbol: Modifier symbol, common, Other
|
||||
U+A720..U+A721 ON Symbol: Modifier symbol, common, Other
|
||||
U+A789..U+A78A L Symbol: Modifier symbol, common, Other
|
||||
U+AB5B L Symbol: Modifier symbol, common, Other
|
||||
U+AB6A..U+AB6B ON Symbol: Modifier symbol, common, Other
|
||||
U+FBB2..U+FBC2 AL Symbol: Modifier symbol, arabic, Other
|
||||
U+FF3E ON Symbol: Modifier symbol, common, Other
|
||||
U+FF40 ON Symbol: Modifier symbol, common, Other
|
||||
U+FFE3 ON Symbol: Modifier symbol, common, Other
|
||||
U+1F3FB..U+1F3FF ON Symbol: Modifier symbol, common, Extend
|
||||
find type Pd
|
||||
U+002D ES Punctuation: Dash punctuation, Common, Other
|
||||
U+058A ON Punctuation: Dash punctuation, Armenian, Other
|
||||
U+05BE R Punctuation: Dash punctuation, Hebrew, Other
|
||||
U+1400 ON Punctuation: Dash punctuation, Canadian_Aboriginal, Other
|
||||
U+1806 ON Punctuation: Dash punctuation, Mongolian, Other
|
||||
U+2010..U+2015 ON Punctuation: Dash punctuation, Common, Other
|
||||
U+2E17 ON Punctuation: Dash punctuation, Common, Other
|
||||
U+2E1A ON Punctuation: Dash punctuation, Common, Other
|
||||
U+2E3A..U+2E3B ON Punctuation: Dash punctuation, Common, Other
|
||||
U+2E40 ON Punctuation: Dash punctuation, Common, Other
|
||||
U+2E5D ON Punctuation: Dash punctuation, Common, Other
|
||||
U+301C ON Punctuation: Dash punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana]
|
||||
U+3030 ON Punctuation: Dash punctuation, Common, Extended Pictographic, [Bopomofo, Hangul, Han, Hiragana, Katakana]
|
||||
U+30A0 ON Punctuation: Dash punctuation, Common, Other, [Hiragana, Katakana]
|
||||
U+FE31..U+FE32 ON Punctuation: Dash punctuation, Common, Other
|
||||
U+FE58 ON Punctuation: Dash punctuation, Common, Other
|
||||
U+FE63 ES Punctuation: Dash punctuation, Common, Other
|
||||
U+FF0D ES Punctuation: Dash punctuation, Common, Other
|
||||
U+10EAD R Punctuation: Dash punctuation, Yezidi, Other
|
||||
U+002D ES Punctuation: Dash punctuation, common, Other
|
||||
U+058A ON Punctuation: Dash punctuation, armenian, Other
|
||||
U+05BE R Punctuation: Dash punctuation, hebrew, Other
|
||||
U+1400 ON Punctuation: Dash punctuation, canadianaboriginal, Other
|
||||
U+1806 ON Punctuation: Dash punctuation, mongolian, Other
|
||||
U+2010..U+2015 ON Punctuation: Dash punctuation, common, Other
|
||||
U+2E17 ON Punctuation: Dash punctuation, common, Other
|
||||
U+2E1A ON Punctuation: Dash punctuation, common, Other
|
||||
U+2E3A..U+2E3B ON Punctuation: Dash punctuation, common, Other
|
||||
U+2E40 ON Punctuation: Dash punctuation, common, Other
|
||||
U+2E5D ON Punctuation: Dash punctuation, common, Other
|
||||
U+301C ON Punctuation: Dash punctuation, common, Other, [bopomofo, hangul, han, hiragana, katakana]
|
||||
U+3030 ON Punctuation: Dash punctuation, common, Extended Pictographic, [bopomofo, hangul, han, hiragana, katakana]
|
||||
U+30A0 ON Punctuation: Dash punctuation, common, Other, [hiragana, katakana]
|
||||
U+FE31..U+FE32 ON Punctuation: Dash punctuation, common, Other
|
||||
U+FE58 ON Punctuation: Dash punctuation, common, Other
|
||||
U+FE63 ES Punctuation: Dash punctuation, common, Other
|
||||
U+FF0D ES Punctuation: Dash punctuation, common, Other
|
||||
U+10EAD R Punctuation: Dash punctuation, yezidi, Other
|
||||
find gbreak LVT
|
||||
U+AC01..U+AC1B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC1D..U+AC37 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC39..U+AC53 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC55..U+AC6F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC71..U+AC8B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC8D..U+ACA7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ACA9..U+ACC3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ACC5..U+ACDF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ACE1..U+ACFB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ACFD..U+AD17 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AD19..U+AD33 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AD35..U+AD4F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AD51..U+AD6B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AD6D..U+AD87 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AD89..U+ADA3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ADA5..U+ADBF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ADC1..U+ADDB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ADDD..U+ADF7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+ADF9..U+AE13 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AE15..U+AE2F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AE31..U+AE4B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AE4D..U+AE67 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AE69..U+AE83 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AE85..U+AE9F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AEA1..U+AEBB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AEBD..U+AED7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AED9..U+AEF3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AEF5..U+AF0F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF11..U+AF2B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF2D..U+AF47 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF49..U+AF63 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF65..U+AF7F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF81..U+AF9B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AF9D..U+AFB7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AFB9..U+AFD3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AFD5..U+AFEF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AFF1..U+B00B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B00D..U+B027 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B029..U+B043 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B045..U+B05F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B061..U+B07B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B07D..U+B097 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B099..U+B0B3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B0B5..U+B0CF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B0D1..U+B0EB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B0ED..U+B107 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B109..U+B123 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B125..U+B13F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B141..U+B15B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B15D..U+B177 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B179..U+B193 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B195..U+B1AF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B1B1..U+B1CB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B1CD..U+B1E7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B1E9..U+B203 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B205..U+B21F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B221..U+B23B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B23D..U+B257 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B259..U+B273 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B275..U+B28F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B291..U+B2AB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B2AD..U+B2C7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B2C9..U+B2E3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B2E5..U+B2FF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B301..U+B31B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B31D..U+B337 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B339..U+B353 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B355..U+B36F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B371..U+B38B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B38D..U+B3A7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B3A9..U+B3C3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B3C5..U+B3DF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B3E1..U+B3FB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B3FD..U+B417 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B419..U+B433 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B435..U+B44F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B451..U+B46B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B46D..U+B487 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B489..U+B4A3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B4A5..U+B4BF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B4C1..U+B4DB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B4DD..U+B4F7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B4F9..U+B513 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B515..U+B52F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B531..U+B54B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B54D..U+B567 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B569..U+B583 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B585..U+B59F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B5A1..U+B5BB L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B5BD..U+B5D7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B5D9..U+B5F3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B5F5..U+B60F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B611..U+B62B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B62D..U+B647 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B649..U+B663 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B665..U+B67F L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B681..U+B69B L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B69D..U+B6B7 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B6B9..U+B6D3 L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+B6D5..U+B6EF L Letter: Other letter, Hangul, Hangul syllable type LVT
|
||||
U+AC01..U+AC1B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AC1D..U+AC37 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AC39..U+AC53 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AC55..U+AC6F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AC71..U+AC8B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AC8D..U+ACA7 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+ACA9..U+ACC3 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+ACC5..U+ACDF L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+ACE1..U+ACFB L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+ACFD..U+AD17 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AD19..U+AD33 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AD35..U+AD4F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AD51..U+AD6B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AD6D..U+AD87 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AD89..U+ADA3 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+ADA5..U+ADBF L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+ADC1..U+ADDB L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+ADDD..U+ADF7 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+ADF9..U+AE13 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AE15..U+AE2F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AE31..U+AE4B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AE4D..U+AE67 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AE69..U+AE83 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AE85..U+AE9F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AEA1..U+AEBB L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AEBD..U+AED7 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AED9..U+AEF3 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AEF5..U+AF0F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AF11..U+AF2B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AF2D..U+AF47 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AF49..U+AF63 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AF65..U+AF7F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AF81..U+AF9B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AF9D..U+AFB7 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AFB9..U+AFD3 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AFD5..U+AFEF L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+AFF1..U+B00B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B00D..U+B027 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B029..U+B043 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B045..U+B05F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B061..U+B07B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B07D..U+B097 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B099..U+B0B3 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B0B5..U+B0CF L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B0D1..U+B0EB L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B0ED..U+B107 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B109..U+B123 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B125..U+B13F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B141..U+B15B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B15D..U+B177 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B179..U+B193 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B195..U+B1AF L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B1B1..U+B1CB L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B1CD..U+B1E7 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B1E9..U+B203 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B205..U+B21F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B221..U+B23B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B23D..U+B257 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B259..U+B273 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B275..U+B28F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B291..U+B2AB L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B2AD..U+B2C7 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B2C9..U+B2E3 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B2E5..U+B2FF L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B301..U+B31B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B31D..U+B337 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B339..U+B353 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B355..U+B36F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B371..U+B38B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B38D..U+B3A7 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B3A9..U+B3C3 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B3C5..U+B3DF L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B3E1..U+B3FB L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B3FD..U+B417 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B419..U+B433 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B435..U+B44F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B451..U+B46B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B46D..U+B487 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B489..U+B4A3 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B4A5..U+B4BF L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B4C1..U+B4DB L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B4DD..U+B4F7 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B4F9..U+B513 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B515..U+B52F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B531..U+B54B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B54D..U+B567 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B569..U+B583 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B585..U+B59F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B5A1..U+B5BB L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B5BD..U+B5D7 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B5D9..U+B5F3 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B5F5..U+B60F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B611..U+B62B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B62D..U+B647 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B649..U+B663 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B665..U+B67F L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B681..U+B69B L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B69D..U+B6B7 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B6B9..U+B6D3 L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
U+B6D5..U+B6EF L Letter: Other letter, hangul, Hangul syllable type LVT
|
||||
...
|
||||
find script Old_Uyghur
|
||||
U+10F70..U+10F81 R Letter: Other letter, Old_Uyghur, Other
|
||||
U+10F82..U+10F85 NSM Mark: Non-spacing mark, Old_Uyghur, Extend
|
||||
U+10F86..U+10F89 R Punctuation: Other punctuation, Old_Uyghur, Other
|
||||
** Unrecognized script name "Old_Uyghur"
|
||||
find bidi PDF
|
||||
U+202C *PDF Control: Format, Common, Control
|
||||
U+202C *PDF Control: Format, common, Control
|
||||
find bidi CS
|
||||
U+002C CS Punctuation: Other punctuation, Common, Other
|
||||
U+002E..U+002F CS Punctuation: Other punctuation, Common, Other
|
||||
U+003A CS Punctuation: Other punctuation, Common, Other
|
||||
U+00A0 CS Separator: Space separator, Common, Other
|
||||
U+060C CS Punctuation: Other punctuation, Common, Other, [Arabic, Nko, Hanifi_Rohingya, Syriac, Thaana, Yezidi]
|
||||
U+202F CS Separator: Space separator, Common, Other, [Latin, Mongolian]
|
||||
U+2044 CS Symbol: Mathematical symbol, Common, Other
|
||||
U+FE50 CS Punctuation: Other punctuation, Common, Other
|
||||
U+FE52 CS Punctuation: Other punctuation, Common, Other
|
||||
U+FE55 CS Punctuation: Other punctuation, Common, Other
|
||||
U+FF0C CS Punctuation: Other punctuation, Common, Other
|
||||
U+FF0E..U+FF0F CS Punctuation: Other punctuation, Common, Other
|
||||
U+FF1A CS Punctuation: Other punctuation, Common, Other
|
||||
U+002C CS Punctuation: Other punctuation, common, Other
|
||||
U+002E..U+002F CS Punctuation: Other punctuation, common, Other
|
||||
U+003A CS Punctuation: Other punctuation, common, Other
|
||||
U+00A0 CS Separator: Space separator, common, Other
|
||||
U+060C CS Punctuation: Other punctuation, common, Other, [arabic, nko, hanifirohingya, syriac, thaana, yezidi]
|
||||
U+202F CS Separator: Space separator, common, Other, [latin, mongolian]
|
||||
U+2044 CS Symbol: Mathematical symbol, common, Other
|
||||
U+FE50 CS Punctuation: Other punctuation, common, Other
|
||||
U+FE52 CS Punctuation: Other punctuation, common, Other
|
||||
U+FE55 CS Punctuation: Other punctuation, common, Other
|
||||
U+FF0C CS Punctuation: Other punctuation, common, Other
|
||||
U+FF0E..U+FF0F CS Punctuation: Other punctuation, common, Other
|
||||
U+FF1A CS Punctuation: Other punctuation, common, Other
|
||||
find bidi CS type Sm
|
||||
U+2044 CS Symbol: Mathematical symbol, Common, Other
|
||||
U+2044 CS Symbol: Mathematical symbol, common, Other
|
||||
find bidi B
|
||||
U+000A B Control: Control, Common, LF
|
||||
U+000D B Control: Control, Common, CR
|
||||
U+001C..U+001E B Control: Control, Common, Control
|
||||
U+0085 B Control: Control, Common, Control
|
||||
U+2029 B Separator: Paragraph separator, Common, Control
|
||||
U+000A B Control: Control, common, LF
|
||||
U+000D B Control: Control, common, CR
|
||||
U+001C..U+001E B Control: Control, common, Control
|
||||
U+0085 B Control: Control, common, Control
|
||||
U+2029 B Separator: Paragraph separator, common, Control
|
||||
find bidi FSI
|
||||
U+2068 *FSI Control: Format, Common, Control
|
||||
U+2068 *FSI Control: Format, common, Control
|
||||
find bidi PDI
|
||||
U+2069 *PDI Control: Format, Common, Control
|
||||
U+2069 *PDI Control: Format, common, Control
|
||||
find bidi RLI
|
||||
U+2067 *RLI Control: Format, Common, Control
|
||||
U+2067 *RLI Control: Format, common, Control
|
||||
find bidi RLO
|
||||
U+202E *RLO Control: Format, Common, Control
|
||||
U+202E *RLO Control: Format, common, Control
|
||||
find bidi S
|
||||
U+0009 S Control: Control, Common, Control
|
||||
U+000B S Control: Control, Common, Control
|
||||
U+001F S Control: Control, Common, Control
|
||||
U+0009 S Control: Control, common, Control
|
||||
U+000B S Control: Control, common, Control
|
||||
U+001F S Control: Control, common, Control
|
||||
find bidi WS
|
||||
U+000C WS Control: Control, Common, Control
|
||||
U+0020 WS Separator: Space separator, Common, Other
|
||||
U+1680 WS Separator: Space separator, Ogham, Other
|
||||
U+2000..U+200A WS Separator: Space separator, Common, Other
|
||||
U+2028 WS Separator: Line separator, Common, Control
|
||||
U+205F WS Separator: Space separator, Common, Other
|
||||
U+3000 WS Separator: Space separator, Common, Other
|
||||
U+000C WS Control: Control, common, Control
|
||||
U+0020 WS Separator: Space separator, common, Other
|
||||
U+1680 WS Separator: Space separator, ogham, Other
|
||||
U+2000..U+200A WS Separator: Space separator, common, Other
|
||||
U+2028 WS Separator: Line separator, common, Control
|
||||
U+205F WS Separator: Space separator, common, Other
|
||||
U+3000 WS Separator: Space separator, common, Other
|
||||
find bidi_control
|
||||
U+061C *AL Control: Format, Arabic, Control, [Arabic, Syriac, Thaana]
|
||||
U+200E *L Control: Format, Common, Control
|
||||
U+200F *R Control: Format, Common, Control
|
||||
U+202A *LRE Control: Format, Common, Control
|
||||
U+202B *RLE Control: Format, Common, Control
|
||||
U+202C *PDF Control: Format, Common, Control
|
||||
U+202D *LRO Control: Format, Common, Control
|
||||
U+202E *RLO Control: Format, Common, Control
|
||||
U+2066 *LRT Control: Format, Common, Control
|
||||
U+2067 *RLI Control: Format, Common, Control
|
||||
U+2068 *FSI Control: Format, Common, Control
|
||||
U+2069 *PDI Control: Format, Common, Control
|
||||
U+061C *AL Control: Format, arabic, Control, [arabic, syriac, thaana]
|
||||
U+200E *L Control: Format, common, Control
|
||||
U+200F *R Control: Format, common, Control
|
||||
U+202A *LRE Control: Format, common, Control
|
||||
U+202B *RLE Control: Format, common, Control
|
||||
U+202C *PDF Control: Format, common, Control
|
||||
U+202D *LRO Control: Format, common, Control
|
||||
U+202E *RLO Control: Format, common, Control
|
||||
U+2066 *LRI Control: Format, common, Control
|
||||
U+2067 *RLI Control: Format, common, Control
|
||||
U+2068 *FSI Control: Format, common, Control
|
||||
U+2069 *PDI Control: Format, common, Control
|
||||
|
|
|
@ -123,18 +123,20 @@ opcode is used to select the column. The values are as follows:
|
|||
*/
|
||||
|
||||
static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
|
||||
/* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */
|
||||
{ 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0 }, /* PT_LAMP */
|
||||
{ 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0 }, /* PT_GC */
|
||||
{ 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0 }, /* PT_PC */
|
||||
{ 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0 }, /* PT_SC */
|
||||
{ 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0 }, /* PT_ALNUM */
|
||||
{ 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_SPACE */
|
||||
{ 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_PXSPACE */
|
||||
{ 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0 }, /* PT_WORD */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 } /* PT_UCNC */
|
||||
/* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BIDICO */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */
|
||||
{ 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 1 }, /* PT_LAMP */
|
||||
{ 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */
|
||||
{ 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */
|
||||
{ 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */
|
||||
{ 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0, 0, 1 }, /* PT_ALNUM */
|
||||
{ 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0, 0, 1 }, /* PT_SPACE */
|
||||
{ 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0, 0, 1 }, /* PT_PXSPACE */
|
||||
{ 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0, 0, 1 }, /* PT_WORD */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */
|
||||
{ 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 } /* PT_BIDICO */
|
||||
};
|
||||
|
||||
/* This table is used to check whether auto-possessification is possible
|
||||
|
@ -251,6 +253,14 @@ switch(ptype)
|
|||
if (c == *p++) return negated;
|
||||
}
|
||||
break; /* Control never reaches here */
|
||||
|
||||
/* Haven't yet thought these through. */
|
||||
|
||||
case PT_BIDICL:
|
||||
return FALSE;
|
||||
|
||||
case PT_BIDICO:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
|
|
|
@ -2088,7 +2088,8 @@ get_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, uint16_t *ptypeptr,
|
|||
PCRE2_UCHAR c;
|
||||
PCRE2_SIZE i, bot, top;
|
||||
PCRE2_SPTR ptr = *ptrptr;
|
||||
PCRE2_UCHAR name[32];
|
||||
PCRE2_UCHAR name[50];
|
||||
PCRE2_UCHAR *vptr = NULL;
|
||||
|
||||
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
|
||||
c = *ptr++;
|
||||
|
@ -2109,9 +2110,11 @@ if (c == CHAR_LEFT_CURLY_BRACKET)
|
|||
{
|
||||
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
|
||||
c = *ptr++;
|
||||
while (c == '_' || c == '-' || isspace(c)) c = *ptr++;
|
||||
if (c == CHAR_NUL) goto ERROR_RETURN;
|
||||
if (c == CHAR_RIGHT_CURLY_BRACKET) break;
|
||||
name[i] = c;
|
||||
name[i] = tolower(c);
|
||||
if (c == ':' || c == '=') vptr = name + i;
|
||||
}
|
||||
if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
|
||||
name[i] = 0;
|
||||
|
@ -2122,13 +2125,28 @@ letter. */
|
|||
|
||||
else if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0)
|
||||
{
|
||||
name[0] = c;
|
||||
name[0] = tolower(c);
|
||||
name[1] = 0;
|
||||
}
|
||||
else goto ERROR_RETURN;
|
||||
|
||||
*ptrptr = ptr;
|
||||
|
||||
/* If the property contains ':' or '=' we have class name and value separately
|
||||
specified. The only case currently supported is Bidi_Class, for which the
|
||||
property names are "bidi<name>". */
|
||||
|
||||
if (vptr != NULL)
|
||||
{
|
||||
*vptr = 0; /* Terminate class name */
|
||||
if (PRIV(strcmp_c8)(name, "bidiclass") != 0)
|
||||
{
|
||||
*errorcodeptr = ERR47;
|
||||
return FALSE;
|
||||
}
|
||||
memmove(name + 4, vptr + 1, (name + i - vptr)*sizeof(PCRE2_UCHAR));
|
||||
}
|
||||
|
||||
/* Search for a recognized property name using binary chop. */
|
||||
|
||||
bot = 0;
|
||||
|
@ -2147,6 +2165,7 @@ while (bot < top)
|
|||
}
|
||||
if (r > 0) bot = i + 1; else top = i;
|
||||
}
|
||||
|
||||
*errorcodeptr = ERR47; /* Unrecognized name */
|
||||
return FALSE;
|
||||
|
||||
|
|
|
@ -1240,6 +1240,14 @@ for (;;)
|
|||
c >= 0xe000;
|
||||
break;
|
||||
|
||||
case PT_BIDICO:
|
||||
OK = UCD_BIDICONTROL(c) != 0;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
OK = UCD_BIDICLASS(c) == code[2];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
|
@ -1498,6 +1506,14 @@ for (;;)
|
|||
c >= 0xe000;
|
||||
break;
|
||||
|
||||
case PT_BIDICO:
|
||||
OK = UCD_BIDICONTROL(c) != 0;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
OK = UCD_BIDICLASS(c) == code[3];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
|
@ -1739,6 +1755,14 @@ for (;;)
|
|||
c >= 0xe000;
|
||||
break;
|
||||
|
||||
case PT_BIDICO:
|
||||
OK = UCD_BIDICONTROL(c) != 0;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
OK = UCD_BIDICLASS(c) == code[3];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
|
@ -2005,6 +2029,14 @@ for (;;)
|
|||
c >= 0xe000;
|
||||
break;
|
||||
|
||||
case PT_BIDICO:
|
||||
OK = UCD_BIDICONTROL(c) != 0;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
OK = UCD_BIDICLASS(c) == code[1 + IMM2_SIZE + 2];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
|
||||
default:
|
||||
|
|
|
@ -1262,12 +1262,14 @@ only. */
|
|||
#define PT_PC 3 /* Specified particular characteristic (e.g. Lu) */
|
||||
#define PT_SC 4 /* Script (e.g. Han) */
|
||||
#define PT_ALNUM 5 /* Alphanumeric - the union of L and N */
|
||||
#define PT_SPACE 6 /* Perl space - Z plus 9,10,12,13 */
|
||||
#define PT_SPACE 6 /* Perl space - general category Z plus 9,10,12,13 */
|
||||
#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */
|
||||
#define PT_WORD 8 /* Word - L plus N plus underscore */
|
||||
#define PT_CLIST 9 /* Pseudo-property: match character list */
|
||||
#define PT_UCNC 10 /* Universal Character nameable character */
|
||||
#define PT_TABSIZE 11 /* Size of square table for autopossessify tests */
|
||||
#define PT_BIDICL 11 /* Specified bidi class */
|
||||
#define PT_BIDICO 12 /* Bidi control character */
|
||||
#define PT_TABSIZE 13 /* Size of square table for autopossessify tests */
|
||||
|
||||
/* The following special properties are used only in XCLASS items, when POSIX
|
||||
classes are specified and PCRE2_UCP is set - in other words, for Unicode
|
||||
|
@ -1275,22 +1277,22 @@ handling of these classes. They are not available via the \p or \P escapes like
|
|||
those in the above list, and so they do not take part in the autopossessifying
|
||||
table. */
|
||||
|
||||
#define PT_PXGRAPH 11 /* [:graph:] - characters that mark the paper */
|
||||
#define PT_PXPRINT 12 /* [:print:] - [:graph:] plus non-control spaces */
|
||||
#define PT_PXPUNCT 13 /* [:punct:] - punctuation characters */
|
||||
#define PT_PXGRAPH 13 /* [:graph:] - characters that mark the paper */
|
||||
#define PT_PXPRINT 14 /* [:print:] - [:graph:] plus non-control spaces */
|
||||
#define PT_PXPUNCT 15 /* [:punct:] - punctuation characters */
|
||||
|
||||
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
||||
contain characters with values greater than 255. */
|
||||
|
||||
#define XCL_NOT 0x01 /* Flag: this is a negative class */
|
||||
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
|
||||
#define XCL_HASPROP 0x04 /* Flag: property checks are present. */
|
||||
#define XCL_NOT 0x01 /* Flag: this is a negative class */
|
||||
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
|
||||
#define XCL_HASPROP 0x04 /* Flag: property checks are present. */
|
||||
|
||||
#define XCL_END 0 /* Marks end of individual items */
|
||||
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
|
||||
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
|
||||
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
|
||||
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
|
||||
#define XCL_END 0 /* Marks end of individual items */
|
||||
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
|
||||
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
|
||||
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */
|
||||
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
|
||||
|
||||
/* These are escaped items that aren't just an encoding of a particular data
|
||||
value such as \n. They must have non-zero values, as check_escape() returns 0
|
||||
|
@ -1828,8 +1830,11 @@ typedef struct {
|
|||
property. The remaining bits hold the bidi class, but as there are only 23
|
||||
classes, we can mask off 5 bits - leaving two free for the future. */
|
||||
|
||||
#define UCD_BIDICLASS(ch) (GET_UCD(ch)->bidi & 0x1fu)
|
||||
#define UCD_BIDICONTROL(ch) (GET_UCD(ch)->bidi & 0x80u)
|
||||
#define UCD_BIDICLASS_MASK 0x1fu
|
||||
#define UCD_BIDICONTROL_BIT 0x80u
|
||||
|
||||
#define UCD_BIDICLASS(ch) (GET_UCD(ch)->bidi & UCD_BIDICLASS_MASK)
|
||||
#define UCD_BIDICONTROL(ch) (GET_UCD(ch)->bidi & UCD_BIDICONTROL_BIT)
|
||||
|
||||
/* Header for serialized pcre2 codes. */
|
||||
|
||||
|
|
|
@ -159,7 +159,8 @@ enum { RM100=100, RM101 };
|
|||
#ifdef SUPPORT_UNICODE
|
||||
enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
|
||||
RM208, RM209, RM210, RM211, RM212, RM213, RM214, RM215,
|
||||
RM216, RM217, RM218, RM219, RM220, RM221, RM222 };
|
||||
RM216, RM217, RM218, RM219, RM220, RM221, RM222, RM223,
|
||||
RM224 };
|
||||
#endif
|
||||
|
||||
/* Define short names for general fields in the current backtrack frame, which
|
||||
|
@ -2503,6 +2504,16 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_BIDICO:
|
||||
if (((prop->bidi & UCD_BIDICONTROL_BIT) != 0) == (Fop == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
if (((prop->bidi & UCD_BIDICLASS_MASK) == Fecode[2]) == (Fop == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
/* This should never occur */
|
||||
|
||||
default:
|
||||
|
@ -2804,6 +2815,34 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
}
|
||||
break;
|
||||
|
||||
case PT_BIDICO:
|
||||
for (i = 1; i <= Lmin; i++)
|
||||
{
|
||||
if (Feptr >= mb->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
if ((UCD_BIDICONTROL(fc) != 0) == (Lctype == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
for (i = 1; i <= Lmin; i++)
|
||||
{
|
||||
if (Feptr >= mb->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
|
||||
/* This should not occur */
|
||||
|
||||
default:
|
||||
|
@ -3562,6 +3601,40 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
}
|
||||
/* Control never gets here */
|
||||
|
||||
case PT_BIDICO:
|
||||
for (;;)
|
||||
{
|
||||
RMATCH(Fecode, RM223);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
|
||||
if (Feptr >= mb->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
if ((UCD_BIDICONTROL(fc) != 0) == (Lctype == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
/* Control never gets here */
|
||||
|
||||
case PT_BIDICL:
|
||||
for (;;)
|
||||
{
|
||||
RMATCH(Fecode, RM224);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
|
||||
if (Feptr >= mb->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
/* Control never gets here */
|
||||
|
||||
/* This should never occur */
|
||||
default:
|
||||
return PCRE2_ERROR_INTERNAL;
|
||||
|
@ -4076,6 +4149,38 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
}
|
||||
break;
|
||||
|
||||
case PT_BIDICO:
|
||||
for (i = Lmin; i < Lmax; i++)
|
||||
{
|
||||
int len = 1;
|
||||
if (Feptr >= mb->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
break;
|
||||
}
|
||||
GETCHARLENTEST(fc, Feptr, len);
|
||||
if ((UCD_BIDICONTROL(fc) != 0) == (Lctype == OP_NOTPROP))
|
||||
break;
|
||||
Feptr+= len;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
for (i = Lmin; i < Lmax; i++)
|
||||
{
|
||||
int len = 1;
|
||||
if (Feptr >= mb->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
break;
|
||||
}
|
||||
GETCHARLENTEST(fc, Feptr, len);
|
||||
if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
|
||||
break;
|
||||
Feptr+= len;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
return PCRE2_ERROR_INTERNAL;
|
||||
}
|
||||
|
@ -6066,7 +6171,7 @@ switch (Freturn_id)
|
|||
LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
|
||||
LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
|
||||
LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
|
||||
LBL(221) LBL(222)
|
||||
LBL(221) LBL(222) LBL(223) LBL(224)
|
||||
#endif
|
||||
|
||||
default:
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
New API code Copyright (c) 2016-2021 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -273,8 +273,8 @@ print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
|
|||
{
|
||||
if (code[1] != PT_CLIST)
|
||||
{
|
||||
fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1],
|
||||
code[2]), after);
|
||||
const char *s = get_ucpname(code[1], code[2]);
|
||||
fprintf(f, "%s%s %c%s%s", before, OP_names[*code], toupper(s[0]), s+1, after);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -724,6 +724,7 @@ for(;;)
|
|||
{
|
||||
unsigned int ptype = *ccode++;
|
||||
unsigned int pvalue = *ccode++;
|
||||
const char *s;
|
||||
|
||||
switch(ptype)
|
||||
{
|
||||
|
@ -740,8 +741,8 @@ for(;;)
|
|||
break;
|
||||
|
||||
default:
|
||||
fprintf(f, "\\%c{%s}", (not? 'P':'p'),
|
||||
get_ucpname(ptype, pvalue));
|
||||
s = get_ucpname(ptype, pvalue);
|
||||
fprintf(f, "\\%c{%c%s}", (not? 'P':'p'), toupper(s[0]), s+1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2018 University of Cambridge
|
||||
New API code Copyright (c) 2018-2021 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
|
1313
src/pcre2_tables.c
1313
src/pcre2_tables.c
File diff suppressed because it is too large
Load Diff
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
New API code Copyright (c) 2016-2021 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -206,6 +206,16 @@ while ((t = *data++) != XCL_END)
|
|||
return !negated;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_BIDICO:
|
||||
if (((prop->bidi & UCD_BIDICONTROL_BIT) != 0) == isprop)
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
if (((prop->bidi & UCD_BIDICLASS_MASK) == data[1]) == isprop)
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
/* The following three properties can occur only in an XCLASS, as there
|
||||
is no \p or \P coding for them. */
|
||||
|
|
|
@ -441,6 +441,7 @@ enum { MOD_CTC, /* Applies to a compile context */
|
|||
MOD_PAT, /* Applies to a pattern */
|
||||
MOD_PATP, /* Ditto, OK for Perl test */
|
||||
MOD_DAT, /* Applies to a data line */
|
||||
MOD_DATP, /* Ditto, OK for Perl test */
|
||||
MOD_PD, /* Applies to a pattern or a data line */
|
||||
MOD_PDP, /* As MOD_PD, OK for Perl test */
|
||||
MOD_PND, /* As MOD_PD, but not for a default pattern */
|
||||
|
@ -700,7 +701,7 @@ static modstruct modlist[] = {
|
|||
{ "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) },
|
||||
{ "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) },
|
||||
{ "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) },
|
||||
{ "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) },
|
||||
{ "no_jit", MOD_DATP, MOD_OPT, PCRE2_NO_JIT, DO(options) },
|
||||
{ "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) },
|
||||
{ "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) },
|
||||
{ "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) },
|
||||
|
@ -3583,6 +3584,7 @@ if (restrict_for_perl_test) switch(m->which)
|
|||
{
|
||||
case MOD_PNDP:
|
||||
case MOD_PATP:
|
||||
case MOD_DATP:
|
||||
case MOD_PDP:
|
||||
break;
|
||||
|
||||
|
@ -3604,7 +3606,8 @@ switch (m->which)
|
|||
else if (ctx == CTX_DAT) field = PTR(dat_context);
|
||||
break;
|
||||
|
||||
case MOD_DAT: /* Data line modifier */
|
||||
case MOD_DAT: /* Data line modifier */
|
||||
case MOD_DATP: /* Allowed for Perl test */
|
||||
if (dctl != NULL) field = dctl;
|
||||
break;
|
||||
|
||||
|
|
|
@ -2495,4 +2495,118 @@
|
|||
\x{42f}
|
||||
\x{44f}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Tests for bidi control and bidi class properties, not yet supported by JIT.
|
||||
|
||||
#subject no_jit
|
||||
|
||||
/\p{ bidi_control }/utf
|
||||
-->\x{202c}<--
|
||||
|
||||
/\p{bidicontrol}+/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/\p{bidicontrol}+?/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/\p{bidicontrol}++/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/[\p{bidi_control}]/utf
|
||||
-->\x{202c}<--
|
||||
|
||||
/[\p{bidicontrol}]+/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/[\p{bidicontrol}]+?/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/[\p{bidicontrol}]++/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/[\p{bidicontrol}<>]+/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/\P{bidicontrol}+/g,utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/\p{^bidicontrol}+/g,utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/\p{bidi class = al}/utf
|
||||
-->\x{061D}<--
|
||||
|
||||
/\p{bidi class = al}+/utf
|
||||
-->\x{061D}\x{061e}\x{061f}<--
|
||||
|
||||
/\p{bidi_class : AL}+?/utf
|
||||
-->\x{061D}\x{061e}\x{061f}<--
|
||||
|
||||
/\p{Bidi_Class : AL}++/utf
|
||||
-->\x{061D}\x{061e}\x{061f}<--
|
||||
|
||||
/\p{bidi class = aN}+/utf
|
||||
-->\x{061D}\x{0602}\x{0604}\x{061f}<--
|
||||
|
||||
/\p{bidi class = B}+/utf
|
||||
-->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<--
|
||||
|
||||
/\p{bidi class:BN}+/utf
|
||||
-->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<--
|
||||
|
||||
/\p{bidiclass:cs}+/utf
|
||||
-->,.\x{060c}\x{ff1a}<--
|
||||
|
||||
/\p{bidiclass:En}+/utf
|
||||
-->09\x{b2}\x{2074}\x{1fbf9}<--
|
||||
|
||||
/\p{bidiclass:es}+/utf
|
||||
==>+-\x{207a}\x{ff0d}<==
|
||||
|
||||
/\p{bidiclass:et}+/utf
|
||||
-->#\{24}%\x{a2}\x{A838}\x{1e2ff}<--
|
||||
|
||||
/\p{bidiclass:FSI}+/utf
|
||||
-->\x{2068}<--
|
||||
|
||||
/\p{bidi class:L}+/utf
|
||||
-->ABC<--
|
||||
|
||||
/\P{bidi class:L}+/utf
|
||||
-->ABC<--
|
||||
|
||||
/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf
|
||||
-->\x{202a}\x{2066}\x{202d}<--
|
||||
|
||||
/\p{bidi class:NSM}+/utf
|
||||
-->\x{9bc}\x{a71}\x{e31}<--
|
||||
|
||||
/\p{bidi class:ON}+/utf
|
||||
-->\x{21}'()*;@\x{384}\x{2039}<=-
|
||||
|
||||
/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf
|
||||
-->\x{202c}\x{2069}<--
|
||||
|
||||
/\p{bidi class:R}+/utf
|
||||
-->\x{590}\x{5c6}\x{200f}\x{10805}<--
|
||||
|
||||
/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf
|
||||
-->\x{202b}\x{2067}\x{202e}<--
|
||||
|
||||
/\p{bidi class:S}+\p{bidiclass:WS}+/utf
|
||||
-->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<--
|
||||
|
||||
#subject -no_jit
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# End of testinput4
|
||||
|
|
|
@ -2188,4 +2188,8 @@
|
|||
/(\xc1)\1/i,ucp
|
||||
\xc1\xe1\=no_jit
|
||||
|
||||
/\p{L&}+\p{bidi_control}/B
|
||||
|
||||
/\p{bidi_control}+\p{L&}/B
|
||||
|
||||
# End of testinput5
|
||||
|
|
|
@ -2093,4 +2093,114 @@
|
|||
/(?<=\x{100})\x{200}(?=\x{300})/utf,allusedtext
|
||||
\x{100}\x{200}\x{300}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Tests for bidi control and bidi class properties
|
||||
|
||||
/\p{ bidi_control }/utf
|
||||
-->\x{202c}<--
|
||||
|
||||
/\p{bidicontrol}+/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/\p{bidicontrol}+?/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/\p{bidicontrol}++/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/[\p{bidi_control}]/utf
|
||||
-->\x{202c}<--
|
||||
|
||||
/[\p{bidicontrol}]+/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/[\p{bidicontrol}]+?/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/[\p{bidicontrol}]++/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/[\p{bidicontrol}<>]+/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/\P{bidicontrol}+/g,utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/\p{^bidicontrol}+/g,utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
|
||||
/\p{bidi class = al}/utf
|
||||
-->\x{061D}<--
|
||||
|
||||
/\p{bidi class = al}+/utf
|
||||
-->\x{061D}\x{061e}\x{061f}<--
|
||||
|
||||
/\p{bidi_class : AL}+?/utf
|
||||
-->\x{061D}\x{061e}\x{061f}<--
|
||||
|
||||
/\p{Bidi_Class : AL}++/utf
|
||||
-->\x{061D}\x{061e}\x{061f}<--
|
||||
|
||||
/\p{bidi class = aN}+/utf
|
||||
-->\x{061D}\x{0602}\x{0604}\x{061f}<--
|
||||
|
||||
/\p{bidi class = B}+/utf
|
||||
-->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<--
|
||||
|
||||
/\p{bidi class:BN}+/utf
|
||||
-->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<--
|
||||
|
||||
/\p{bidiclass:cs}+/utf
|
||||
-->,.\x{060c}\x{ff1a}<--
|
||||
|
||||
/\p{bidiclass:En}+/utf
|
||||
-->09\x{b2}\x{2074}\x{1fbf9}<--
|
||||
|
||||
/\p{bidiclass:es}+/utf
|
||||
==>+-\x{207a}\x{ff0d}<==
|
||||
|
||||
/\p{bidiclass:et}+/utf
|
||||
-->#\{24}%\x{a2}\x{A838}\x{1e2ff}<--
|
||||
|
||||
/\p{bidiclass:FSI}+/utf
|
||||
-->\x{2068}<--
|
||||
|
||||
/\p{bidi class:L}+/utf
|
||||
-->ABC<--
|
||||
|
||||
/\P{bidi class:L}+/utf
|
||||
-->ABC<--
|
||||
|
||||
/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf
|
||||
-->\x{202a}\x{2066}\x{202d}<--
|
||||
|
||||
/\p{bidi class:NSM}+/utf
|
||||
-->\x{9bc}\x{a71}\x{e31}<--
|
||||
|
||||
/\p{bidi class:ON}+/utf
|
||||
-->\x{21}'()*;@\x{384}\x{2039}<=-
|
||||
|
||||
/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf
|
||||
-->\x{202c}\x{2069}<--
|
||||
|
||||
/\p{bidi class:R}+/utf
|
||||
-->\x{590}\x{5c6}\x{200f}\x{10805}<--
|
||||
|
||||
/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf
|
||||
-->\x{202b}\x{2067}\x{202e}<--
|
||||
|
||||
/\p{bidi class:S}+\p{bidiclass:WS}+/utf
|
||||
-->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<--
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# End of testinput7
|
||||
|
|
|
@ -4032,4 +4032,163 @@ No match
|
|||
\x{44f}
|
||||
0:
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Tests for bidi control and bidi class properties, not yet supported by JIT.
|
||||
|
||||
#subject no_jit
|
||||
|
||||
/\p{ bidi_control }/utf
|
||||
-->\x{202c}<--
|
||||
0: \x{202c}
|
||||
|
||||
/\p{bidicontrol}+/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: \x{2066}\x{2067}\x{2068}\x{2069}
|
||||
|
||||
/\p{bidicontrol}+?/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: \x{61c}
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: \x{2066}
|
||||
|
||||
/\p{bidicontrol}++/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: \x{2066}\x{2067}\x{2068}\x{2069}
|
||||
|
||||
/[\p{bidi_control}]/utf
|
||||
-->\x{202c}<--
|
||||
0: \x{202c}
|
||||
|
||||
/[\p{bidicontrol}]+/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: \x{2066}\x{2067}\x{2068}\x{2069}
|
||||
|
||||
/[\p{bidicontrol}]+?/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: \x{61c}
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: \x{2066}
|
||||
|
||||
/[\p{bidicontrol}]++/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: \x{2066}\x{2067}\x{2068}\x{2069}
|
||||
|
||||
/[\p{bidicontrol}<>]+/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: >\x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: >\x{2066}\x{2067}\x{2068}\x{2069}<
|
||||
|
||||
/\P{bidicontrol}+/g,utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: -->
|
||||
0: <--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: -->
|
||||
0: <--
|
||||
|
||||
/\p{^bidicontrol}+/g,utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: -->
|
||||
0: <--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: -->
|
||||
0: <--
|
||||
|
||||
/\p{bidi class = al}/utf
|
||||
-->\x{061D}<--
|
||||
0: \x{61d}
|
||||
|
||||
/\p{bidi class = al}+/utf
|
||||
-->\x{061D}\x{061e}\x{061f}<--
|
||||
0: \x{61d}\x{61e}\x{61f}
|
||||
|
||||
/\p{bidi_class : AL}+?/utf
|
||||
-->\x{061D}\x{061e}\x{061f}<--
|
||||
0: \x{61d}
|
||||
|
||||
/\p{Bidi_Class : AL}++/utf
|
||||
-->\x{061D}\x{061e}\x{061f}<--
|
||||
0: \x{61d}\x{61e}\x{61f}
|
||||
|
||||
/\p{bidi class = aN}+/utf
|
||||
-->\x{061D}\x{0602}\x{0604}\x{061f}<--
|
||||
0: \x{602}\x{604}
|
||||
|
||||
/\p{bidi class = B}+/utf
|
||||
-->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<--
|
||||
0: \x{0a}\x{0d}\x{1c}\x{1e}\x{85}\x{2029}
|
||||
|
||||
/\p{bidi class:BN}+/utf
|
||||
-->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<--
|
||||
0: \x{00}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}
|
||||
|
||||
/\p{bidiclass:cs}+/utf
|
||||
-->,.\x{060c}\x{ff1a}<--
|
||||
0: ,.\x{60c}\x{ff1a}
|
||||
|
||||
/\p{bidiclass:En}+/utf
|
||||
-->09\x{b2}\x{2074}\x{1fbf9}<--
|
||||
0: 09\x{b2}\x{2074}\x{1fbf9}
|
||||
|
||||
/\p{bidiclass:es}+/utf
|
||||
==>+-\x{207a}\x{ff0d}<==
|
||||
0: +-\x{207a}\x{ff0d}
|
||||
|
||||
/\p{bidiclass:et}+/utf
|
||||
-->#\{24}%\x{a2}\x{A838}\x{1e2ff}<--
|
||||
0: #
|
||||
|
||||
/\p{bidiclass:FSI}+/utf
|
||||
-->\x{2068}<--
|
||||
0: \x{2068}
|
||||
|
||||
/\p{bidi class:L}+/utf
|
||||
-->ABC<--
|
||||
0: ABC
|
||||
|
||||
/\P{bidi class:L}+/utf
|
||||
-->ABC<--
|
||||
0: -->
|
||||
|
||||
/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf
|
||||
-->\x{202a}\x{2066}\x{202d}<--
|
||||
0: \x{202a}\x{2066}\x{202d}
|
||||
|
||||
/\p{bidi class:NSM}+/utf
|
||||
-->\x{9bc}\x{a71}\x{e31}<--
|
||||
0: \x{9bc}\x{a71}\x{e31}
|
||||
|
||||
/\p{bidi class:ON}+/utf
|
||||
-->\x{21}'()*;@\x{384}\x{2039}<=-
|
||||
0: >!'()*;@\x{384}\x{2039}<=
|
||||
|
||||
/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf
|
||||
-->\x{202c}\x{2069}<--
|
||||
0: \x{202c}\x{2069}
|
||||
|
||||
/\p{bidi class:R}+/utf
|
||||
-->\x{590}\x{5c6}\x{200f}\x{10805}<--
|
||||
0: \x{590}\x{5c6}\x{200f}\x{10805}
|
||||
|
||||
/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf
|
||||
-->\x{202b}\x{2067}\x{202e}<--
|
||||
0: \x{202b}\x{2067}\x{202e}
|
||||
|
||||
/\p{bidi class:S}+\p{bidiclass:WS}+/utf
|
||||
-->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<--
|
||||
0: \x{09}\x{0b}\x{1f} \x{0c} \x{2000} \x{3000}
|
||||
|
||||
#subject -no_jit
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# End of testinput4
|
||||
|
|
|
@ -3298,7 +3298,7 @@ No match
|
|||
AllAny+
|
||||
notprop Any
|
||||
AllAny+
|
||||
prop L&
|
||||
prop Lc
|
||||
AllAny+
|
||||
prop L
|
||||
AllAny+
|
||||
|
@ -3322,29 +3322,29 @@ No match
|
|||
/\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
prop L& +
|
||||
prop Lc +
|
||||
AllAny
|
||||
prop L& +
|
||||
prop L&
|
||||
notprop L& ++
|
||||
prop L&
|
||||
prop L& +
|
||||
prop Lc +
|
||||
prop Lc
|
||||
notprop Lc ++
|
||||
prop Lc
|
||||
prop Lc +
|
||||
prop L
|
||||
prop L& +
|
||||
prop Lc +
|
||||
prop Lu
|
||||
prop L& +
|
||||
prop Lc +
|
||||
prop Han
|
||||
prop L& +
|
||||
prop Lc +
|
||||
prop Xan
|
||||
prop L& ++
|
||||
prop Lc ++
|
||||
notprop Xan
|
||||
prop L& ++
|
||||
prop Lc ++
|
||||
prop Xsp
|
||||
prop L& ++
|
||||
prop Lc ++
|
||||
prop Xps
|
||||
prop Xwd +
|
||||
prop L&
|
||||
prop L& +
|
||||
prop Lc
|
||||
prop Lc +
|
||||
prop Xuc
|
||||
Ket
|
||||
End
|
||||
|
@ -3356,7 +3356,7 @@ No match
|
|||
prop N +
|
||||
AllAny
|
||||
prop N +
|
||||
prop L&
|
||||
prop Lc
|
||||
prop N ++
|
||||
prop L
|
||||
prop N +
|
||||
|
@ -3387,7 +3387,7 @@ No match
|
|||
prop Lu +
|
||||
AllAny
|
||||
prop Lu +
|
||||
prop L&
|
||||
prop Lc
|
||||
prop Lu +
|
||||
prop L
|
||||
prop Lu +
|
||||
|
@ -3420,7 +3420,7 @@ No match
|
|||
prop Han +
|
||||
prop Lu
|
||||
prop Han +
|
||||
prop L&
|
||||
prop Lc
|
||||
prop Han +
|
||||
prop L
|
||||
prop Han +
|
||||
|
@ -3449,9 +3449,9 @@ No match
|
|||
prop Xan +
|
||||
AllAny
|
||||
prop Xan +
|
||||
prop L&
|
||||
prop Lc
|
||||
notprop Xan ++
|
||||
prop L&
|
||||
prop Lc
|
||||
prop Xan +
|
||||
prop L
|
||||
prop Xan +
|
||||
|
@ -3480,7 +3480,7 @@ No match
|
|||
prop Xsp +
|
||||
AllAny
|
||||
prop Xsp ++
|
||||
prop L&
|
||||
prop Lc
|
||||
prop Xsp ++
|
||||
prop L
|
||||
prop Xsp ++
|
||||
|
@ -3509,7 +3509,7 @@ No match
|
|||
prop Xwd +
|
||||
AllAny
|
||||
prop Xwd +
|
||||
prop L&
|
||||
prop Lc
|
||||
prop Xwd +
|
||||
prop L
|
||||
prop Xwd +
|
||||
|
@ -3538,7 +3538,7 @@ No match
|
|||
prop Xuc +
|
||||
AllAny
|
||||
prop Xuc +
|
||||
prop L&
|
||||
prop Lc
|
||||
prop Xuc +
|
||||
prop L
|
||||
prop Xuc +
|
||||
|
@ -4949,4 +4949,22 @@ Subject length lower bound = 3
|
|||
0: \xc1\xe1
|
||||
1: \xc1
|
||||
|
||||
/\p{L&}+\p{bidi_control}/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
prop Lc ++
|
||||
prop Bidicontrol
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/\p{bidi_control}+\p{L&}/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
prop Bidicontrol ++
|
||||
prop Lc
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput5
|
||||
|
|
|
@ -3539,4 +3539,179 @@ No match
|
|||
0: \x{100}\x{200}\x{300}
|
||||
<<<<<<< >>>>>>>
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Tests for bidi control and bidi class properties
|
||||
|
||||
/\p{ bidi_control }/utf
|
||||
-->\x{202c}<--
|
||||
0: \x{202c}
|
||||
|
||||
/\p{bidicontrol}+/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: \x{2066}\x{2067}\x{2068}\x{2069}
|
||||
|
||||
/\p{bidicontrol}+?/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
|
||||
1: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}
|
||||
2: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}
|
||||
3: \x{61c}\x{200e}\x{200f}\x{202a}
|
||||
4: \x{61c}\x{200e}\x{200f}
|
||||
5: \x{61c}\x{200e}
|
||||
6: \x{61c}
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: \x{2066}\x{2067}\x{2068}\x{2069}
|
||||
1: \x{2066}\x{2067}\x{2068}
|
||||
2: \x{2066}\x{2067}
|
||||
3: \x{2066}
|
||||
|
||||
/\p{bidicontrol}++/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: \x{2066}\x{2067}\x{2068}\x{2069}
|
||||
|
||||
/[\p{bidi_control}]/utf
|
||||
-->\x{202c}<--
|
||||
0: \x{202c}
|
||||
|
||||
/[\p{bidicontrol}]+/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: \x{2066}\x{2067}\x{2068}\x{2069}
|
||||
|
||||
/[\p{bidicontrol}]+?/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
|
||||
1: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}
|
||||
2: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}
|
||||
3: \x{61c}\x{200e}\x{200f}\x{202a}
|
||||
4: \x{61c}\x{200e}\x{200f}
|
||||
5: \x{61c}\x{200e}
|
||||
6: \x{61c}
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: \x{2066}\x{2067}\x{2068}\x{2069}
|
||||
1: \x{2066}\x{2067}\x{2068}
|
||||
2: \x{2066}\x{2067}
|
||||
3: \x{2066}
|
||||
|
||||
/[\p{bidicontrol}]++/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: \x{2066}\x{2067}\x{2068}\x{2069}
|
||||
|
||||
/[\p{bidicontrol}<>]+/utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: >\x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: >\x{2066}\x{2067}\x{2068}\x{2069}<
|
||||
|
||||
/\P{bidicontrol}+/g,utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: -->
|
||||
0: <--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: -->
|
||||
0: <--
|
||||
|
||||
/\p{^bidicontrol}+/g,utf
|
||||
-->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<--
|
||||
0: -->
|
||||
0: <--
|
||||
-->\x{2066}\x{2067}\x{2068}\x{2069}<--
|
||||
0: -->
|
||||
0: <--
|
||||
|
||||
/\p{bidi class = al}/utf
|
||||
-->\x{061D}<--
|
||||
0: \x{61d}
|
||||
|
||||
/\p{bidi class = al}+/utf
|
||||
-->\x{061D}\x{061e}\x{061f}<--
|
||||
0: \x{61d}\x{61e}\x{61f}
|
||||
|
||||
/\p{bidi_class : AL}+?/utf
|
||||
-->\x{061D}\x{061e}\x{061f}<--
|
||||
0: \x{61d}\x{61e}\x{61f}
|
||||
1: \x{61d}\x{61e}
|
||||
2: \x{61d}
|
||||
|
||||
/\p{Bidi_Class : AL}++/utf
|
||||
-->\x{061D}\x{061e}\x{061f}<--
|
||||
0: \x{61d}\x{61e}\x{61f}
|
||||
|
||||
/\p{bidi class = aN}+/utf
|
||||
-->\x{061D}\x{0602}\x{0604}\x{061f}<--
|
||||
0: \x{602}\x{604}
|
||||
|
||||
/\p{bidi class = B}+/utf
|
||||
-->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<--
|
||||
0: \x{0a}\x{0d}\x{1c}\x{1e}\x{85}\x{2029}
|
||||
|
||||
/\p{bidi class:BN}+/utf
|
||||
-->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<--
|
||||
0: \x{00}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}
|
||||
|
||||
/\p{bidiclass:cs}+/utf
|
||||
-->,.\x{060c}\x{ff1a}<--
|
||||
0: ,.\x{60c}\x{ff1a}
|
||||
|
||||
/\p{bidiclass:En}+/utf
|
||||
-->09\x{b2}\x{2074}\x{1fbf9}<--
|
||||
0: 09\x{b2}\x{2074}\x{1fbf9}
|
||||
|
||||
/\p{bidiclass:es}+/utf
|
||||
==>+-\x{207a}\x{ff0d}<==
|
||||
0: +-\x{207a}\x{ff0d}
|
||||
|
||||
/\p{bidiclass:et}+/utf
|
||||
-->#\{24}%\x{a2}\x{A838}\x{1e2ff}<--
|
||||
0: #
|
||||
|
||||
/\p{bidiclass:FSI}+/utf
|
||||
-->\x{2068}<--
|
||||
0: \x{2068}
|
||||
|
||||
/\p{bidi class:L}+/utf
|
||||
-->ABC<--
|
||||
0: ABC
|
||||
|
||||
/\P{bidi class:L}+/utf
|
||||
-->ABC<--
|
||||
0: -->
|
||||
|
||||
/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf
|
||||
-->\x{202a}\x{2066}\x{202d}<--
|
||||
0: \x{202a}\x{2066}\x{202d}
|
||||
|
||||
/\p{bidi class:NSM}+/utf
|
||||
-->\x{9bc}\x{a71}\x{e31}<--
|
||||
0: \x{9bc}\x{a71}\x{e31}
|
||||
|
||||
/\p{bidi class:ON}+/utf
|
||||
-->\x{21}'()*;@\x{384}\x{2039}<=-
|
||||
0: >!'()*;@\x{384}\x{2039}<=
|
||||
|
||||
/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf
|
||||
-->\x{202c}\x{2069}<--
|
||||
0: \x{202c}\x{2069}
|
||||
|
||||
/\p{bidi class:R}+/utf
|
||||
-->\x{590}\x{5c6}\x{200f}\x{10805}<--
|
||||
0: \x{590}\x{5c6}\x{200f}\x{10805}
|
||||
|
||||
/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf
|
||||
-->\x{202b}\x{2067}\x{202e}<--
|
||||
0: \x{202b}\x{2067}\x{202e}
|
||||
|
||||
/\p{bidi class:S}+\p{bidiclass:WS}+/utf
|
||||
-->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<--
|
||||
0: \x{09}\x{0b}\x{1f} \x{0c} \x{2000} \x{3000}
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# End of testinput7
|
||||
|
|
Loading…
Reference in New Issue