diff --git a/HACKING b/HACKING index 20faf8f..cad11b3 100644 --- a/HACKING +++ b/HACKING @@ -546,8 +546,9 @@ Each is followed by two code units that encode the desired property as a type and a value. The types are a set of #defines of the form PT_xxx, and the values are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file. The value is relevant only for PT_GC (General Category), PT_PC (Particular -Category), PT_SC (Script), and the pseudo-property PT_CLIST, which is used to -identify a list of case-equivalent characters when there are three or more. +Category), PT_SC (Script), PT_BIDICL (Bidi Class), and the pseudo-property +PT_CLIST, which is used to identify a list of case-equivalent characters when +there are three or more. Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by three code units: OP_PROP or OP_NOTPROP, and then the desired property type and @@ -827,4 +828,4 @@ not a real opcode, but is used to check at compile time that tables indexed by opcode are the correct length, in order to catch updating errors. Philip Hazel -12 July 2019 +December 2021 diff --git a/maint/GenerateUtt.py b/maint/GenerateUtt.py index eea6efc..3f268bd 100755 --- a/maint/GenerateUtt.py +++ b/maint/GenerateUtt.py @@ -29,6 +29,9 @@ # Added script names for Unicode 12.1.0, 27-July-2019. # Added script names for Unicode 13.0.0, 10-March-2020. # Added Script names for Unicode 14.0.0, PCRE2-10.39 +# Added support for bidi class and bidi control, 06-December-2021 +# This also involved lower casing strings and removing underscores, in +# accordance with Unicode's "loose matching" rules, which Perl observes. script_names = ['Unknown', 'Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \ 'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \ @@ -78,21 +81,46 @@ category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', general_category_names = ['C', 'L', 'M', 'N', 'P', 'S', 'Z'] -# First add the Unicode script and category names. +bidiclass_names = ['bidiAL', 'bidiAN', 'bidiB', 'bidiBN', 'bidiCS', 'bidiEN', + 'bidiES', 'bidiET', 'bidiFSI', 'bidiL', 'bidiLRE', 'bidiLRI', 'bidiLRO', + 'bidiNSM', 'bidiON', 'bidiPDF', 'bidiPDI', 'bidiR', 'bidiRLE', 'bidiRLI', + 'bidiRLO', 'bidiS', 'bidiWS' ] -utt_table = list(zip(script_names, ['PT_SC'] * len(script_names))) -utt_table += list(zip(category_names, ['PT_PC'] * len(category_names))) -utt_table += list(zip(general_category_names, ['PT_GC'] * len(general_category_names))) +# Create standardized versions of the names by lowercasing and removing +# ampersands. -# Now add our own specials. +def stdnames(x): + y = [''] * len(x) + for i in range(len(x)): + y[i] = x[i].lower().replace('_', '') + return y -utt_table.append(('Any', 'PT_ANY')) -utt_table.append(('L&', 'PT_LAMP')) -utt_table.append(('Xan', 'PT_ALNUM')) -utt_table.append(('Xps', 'PT_PXSPACE')) -utt_table.append(('Xsp', 'PT_SPACE')) -utt_table.append(('Xuc', 'PT_UCNC')) -utt_table.append(('Xwd', 'PT_WORD')) +std_script_names = stdnames(script_names) +std_category_names = stdnames(category_names) +std_general_category_names = stdnames(general_category_names) +std_bidiclass_names = stdnames(bidiclass_names) + +# Create the table, starting with the Unicode script, category and bidi class +# names. We keep both the standardized name and the original, because the +# latter is used for the ucp_xx names. + +utt_table = list(zip(std_script_names, script_names, ['PT_SC'] * len(script_names))) +utt_table += list(zip(std_category_names, category_names, ['PT_PC'] * len(category_names))) +utt_table += list(zip(std_general_category_names, general_category_names, ['PT_GC'] * len(general_category_names))) +utt_table += list(zip(std_bidiclass_names, bidiclass_names, ['PT_BIDICL'] * len(bidiclass_names))) + +# Now add our own specials. Note both the standardized and capitalized forms +# are needed. + +utt_table.append(('any', 'Any', 'PT_ANY')) +utt_table.append(('bidicontrol', 'Bidi_Control', 'PT_BIDICO')) +utt_table.append(('l&', 'L&', 'PT_LAMP')) +utt_table.append(('lc', 'LC', 'PT_LAMP')) +utt_table.append(('xan', 'Xan', 'PT_ALNUM')) +utt_table.append(('xps', 'Xps', 'PT_PXSPACE')) +utt_table.append(('xsp', 'Xsp', 'PT_SPACE')) +utt_table.append(('xuc', 'Xuc', 'PT_UCNC')) +utt_table.append(('xwd', 'Xwd', 'PT_WORD')) # Sort the table. @@ -104,9 +132,7 @@ utt_table.sort() for utt in utt_table: print('#define STRING_%s0' % (utt[0].replace('&', '_AMPERSAND')), end=' ') for c in utt[0]: - if c == '_': - print('STR_UNDERSCORE', end=' ') - elif c == '&': + if c == '&': print('STR_AMPERSAND', end=' ') else: print('STR_%s' % c, end=' '); @@ -121,20 +147,18 @@ for utt in utt_table: if utt == utt_table[-1]: last = ';' print(' STRING_%s0%s' % (utt[0].replace('&', '_AMPERSAND'), last)) -# This was how it was done before the EBCDIC-compatible modification. -# print ' "%s\\0"%s' % (utt[0], last) print('\nconst ucp_type_table PRIV(utt)[] = {') offset = 0 last = ',' for utt in utt_table: - if utt[1] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE', - 'PT_SPACE', 'PT_UCNC', 'PT_WORD'): + if utt[2] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE', + 'PT_SPACE', 'PT_UCNC', 'PT_WORD', 'PT_BIDICO'): value = '0' else: - value = 'ucp_' + utt[0] + value = 'ucp_' + utt[1] if utt == utt_table[-1]: last = '' - print(' { %3d, %s, %s }%s' % (offset, utt[1], value, last)) + print(' { %3d, %s, %s }%s' % (offset, utt[2], value, last)) offset += len(utt[0]) + 1 print('};') diff --git a/maint/ucptest.c b/maint/ucptest.c index 0876840..3c62da3 100644 --- a/maint/ucptest.c +++ b/maint/ucptest.c @@ -34,11 +34,15 @@ return code is always zero. There are three commands: "findprop" must be followed by a space-separated list of Unicode code points as -hex numbers, either without any prefix or starting with "U+". The output is one -line per character, giving its Unicode properties followed by its other case or -cases if one or more exist, followed by its Script Extension list if it is not -just the same as the base script. This list is in square brackets. The -properties are: +hex numbers, either without any prefix or starting with "U+", or as individual +UTF-8 characters preceded by '+'. For example: + + findprop U+1234 5Abc +? + +The output is one line per character, giving its Unicode properties followed by +its other case or cases if one or more exist, followed by its Script Extension +list if it is not just the same as the base script. This list is in square +brackets. The properties are: Bidi control shown as '*' if true Bidi class e.g. NSM (most common is L) @@ -47,9 +51,13 @@ Specific type e.g. Upper case letter Script e.g. Medefaidrin Grapheme break type e.g. Extend (most common is Other) +The scripts names are all in lower case, with underscores removed, because +that's how they are stored for "loose" matching. + "find" must be followed by a list of property names and their values. The -values are case-sensitive. This finds characters that have those properties. If -multiple properties are listed, they must all be matched. Currently supported: +values are case-sensitive, except for bidi class. This finds characters that +have those properties. If multiple properties are listed, they must all be +matched. Currently supported: script The character must have this script property. Only one such script may be given. @@ -59,7 +67,7 @@ multiple properties are listed, they must all be matched. Currently supported: type The character's specific type (e.g. Lu or Nd) must match. gbreak The grapheme break property must match. bidi The character's bidi class must match. - bidi_control The character must be a bidi control character + bidi_control The character must be a bidi control character If a or is preceded by !, the value must NOT be present. For Script Extensions, there may be a mixture of positive and negative @@ -202,6 +210,41 @@ static const unsigned int utf8_table1[] = { static const int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; +/* Macro to pick up the remaining bytes of a UTF-8 character, advancing +the pointer. */ + +#define GETUTF8INC(c, eptr) \ + { \ + if ((c & 0x20u) == 0) \ + c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \ + else if ((c & 0x10u) == 0) \ + { \ + c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \ + eptr += 2; \ + } \ + else if ((c & 0x08u) == 0) \ + { \ + c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \ + ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \ + eptr += 3; \ + } \ + else if ((c & 0x04u) == 0) \ + { \ + c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \ + ((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \ + (eptr[3] & 0x3fu); \ + eptr += 4; \ + } \ + else \ + { \ + c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \ + ((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \ + ((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \ + eptr += 5; \ + } \ + } + + /************************************************* * Convert character value to UTF-8 * @@ -267,6 +310,7 @@ for (i = 0; i < PRIV(utt_size); i++) u = PRIV(utt) + i; if (u->type == PT_SC && u->value == script) break; } + if (i < PRIV(utt_size)) return PRIV(utt_names) + u->name_offset; @@ -601,7 +645,7 @@ while (*s != 0) } for (i = 0; i < sizeof(bd_names)/sizeof(char *); i += 2) { - if (strcmp(CS (value + offset), CS bd_names[i]) == 0) + if (strcasecmp(CS (value + offset), CS bd_names[i]) == 0) { bidiclass = i/2; break; @@ -629,7 +673,7 @@ while (*s != 0) } } -if (script < 0 && scriptx_count == 0 && type < 0 && gbreak < 0 && +if (script < 0 && scriptx_count == 0 && type < 0 && gbreak < 0 && bidiclass < 0 && !bidicontrol) { printf("** No properties specified\n"); @@ -787,12 +831,26 @@ if (strcmp(CS name, "findprop") == 0) unsigned int c; unsigned char *endptr; t = s; - if (strncmp(CS t, "U+", 2) == 0) t += 2; - c = strtoul(CS t, CSS(&endptr), 16); + + if (*t == '+') + { + c = *(++t); + if (c > 0x7fu) + { + GETCHARINC(c, t); + } + endptr = t+1; + } + else + { + if (strncmp(CS t, "U+", 2) == 0) t += 2; + c = strtoul(CS t, CSS(&endptr), 16); + } + if (*endptr != 0 && !isspace(*endptr)) { while (*endptr != 0 && !isspace(*endptr)) endptr++; - printf("** Invalid hex number: ignored \"%.*s\"\n", (int)(endptr-s), s); + printf("** Invalid character specifier: ignored \"%.*s\"\n", (int)(endptr-s), s); } else { @@ -884,19 +942,19 @@ if (argc > 1 && strcmp(argv[1], "-s") == 0) if (argc > first_arg) { int i; - BOOL hexfirst = TRUE; + BOOL datafirst = TRUE; char *arg = argv[first_arg]; unsigned char *s = buffer; - if (strncmp(arg, "U+", 2) != 0 && !isdigit(*arg)) + if (*arg != '+' && strncmp(arg, "U+", 2) != 0 && !isdigit(*arg)) { while (*arg != 0) { - if (!isxdigit(*arg++)) { hexfirst = FALSE; break; } + if (!isxdigit(*arg++)) { datafirst = FALSE; break; } } } - if (hexfirst) + if (datafirst) { strcpy(CS s, "findprop "); s += 9; diff --git a/maint/ucptestdata/testoutput1 b/maint/ucptestdata/testoutput1 index 30c0a87..81db156 100644 --- a/maint/ucptestdata/testoutput1 +++ b/maint/ucptestdata/testoutput1 @@ -1,409 +1,409 @@ findprop 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f -U+0000 BN Control: Control, Common, Control -U+0001 BN Control: Control, Common, Control -U+0002 BN Control: Control, Common, Control -U+0003 BN Control: Control, Common, Control -U+0004 BN Control: Control, Common, Control -U+0005 BN Control: Control, Common, Control -U+0006 BN Control: Control, Common, Control -U+0007 BN Control: Control, Common, Control -U+0008 BN Control: Control, Common, Control -U+0009 S Control: Control, Common, Control -U+000A B Control: Control, Common, LF -U+000B S Control: Control, Common, Control -U+000C WS Control: Control, Common, Control -U+000D B Control: Control, Common, CR -U+000E BN Control: Control, Common, Control -U+000F BN Control: Control, Common, Control +U+0000 BN Control: Control, common, Control +U+0001 BN Control: Control, common, Control +U+0002 BN Control: Control, common, Control +U+0003 BN Control: Control, common, Control +U+0004 BN Control: Control, common, Control +U+0005 BN Control: Control, common, Control +U+0006 BN Control: Control, common, Control +U+0007 BN Control: Control, common, Control +U+0008 BN Control: Control, common, Control +U+0009 S Control: Control, common, Control +U+000A B Control: Control, common, LF +U+000B S Control: Control, common, Control +U+000C WS Control: Control, common, Control +U+000D B Control: Control, common, CR +U+000E BN Control: Control, common, Control +U+000F BN Control: Control, common, Control findprop 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f -U+0010 BN Control: Control, Common, Control -U+0011 BN Control: Control, Common, Control -U+0012 BN Control: Control, Common, Control -U+0013 BN Control: Control, Common, Control -U+0014 BN Control: Control, Common, Control -U+0015 BN Control: Control, Common, Control -U+0016 BN Control: Control, Common, Control -U+0017 BN Control: Control, Common, Control -U+0018 BN Control: Control, Common, Control -U+0019 BN Control: Control, Common, Control -U+001A BN Control: Control, Common, Control -U+001B BN Control: Control, Common, Control -U+001C B Control: Control, Common, Control -U+001D B Control: Control, Common, Control -U+001E B Control: Control, Common, Control -U+001F S Control: Control, Common, Control +U+0010 BN Control: Control, common, Control +U+0011 BN Control: Control, common, Control +U+0012 BN Control: Control, common, Control +U+0013 BN Control: Control, common, Control +U+0014 BN Control: Control, common, Control +U+0015 BN Control: Control, common, Control +U+0016 BN Control: Control, common, Control +U+0017 BN Control: Control, common, Control +U+0018 BN Control: Control, common, Control +U+0019 BN Control: Control, common, Control +U+001A BN Control: Control, common, Control +U+001B BN Control: Control, common, Control +U+001C B Control: Control, common, Control +U+001D B Control: Control, common, Control +U+001E B Control: Control, common, Control +U+001F S Control: Control, common, Control findprop 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f -U+0020 WS Separator: Space separator, Common, Other -U+0021 ON Punctuation: Other punctuation, Common, Other -U+0022 ON Punctuation: Other punctuation, Common, Other -U+0023 ET Punctuation: Other punctuation, Common, Other -U+0024 ET Symbol: Currency symbol, Common, Other -U+0025 ET Punctuation: Other punctuation, Common, Other -U+0026 ON Punctuation: Other punctuation, Common, Other -U+0027 ON Punctuation: Other punctuation, Common, Other -U+0028 ON Punctuation: Open punctuation, Common, Other -U+0029 ON Punctuation: Close punctuation, Common, Other -U+002A ON Punctuation: Other punctuation, Common, Other -U+002B ES Symbol: Mathematical symbol, Common, Other -U+002C CS Punctuation: Other punctuation, Common, Other -U+002D ES Punctuation: Dash punctuation, Common, Other -U+002E CS Punctuation: Other punctuation, Common, Other -U+002F CS Punctuation: Other punctuation, Common, Other +U+0020 WS Separator: Space separator, common, Other +U+0021 ON Punctuation: Other punctuation, common, Other +U+0022 ON Punctuation: Other punctuation, common, Other +U+0023 ET Punctuation: Other punctuation, common, Other +U+0024 ET Symbol: Currency symbol, common, Other +U+0025 ET Punctuation: Other punctuation, common, Other +U+0026 ON Punctuation: Other punctuation, common, Other +U+0027 ON Punctuation: Other punctuation, common, Other +U+0028 ON Punctuation: Open punctuation, common, Other +U+0029 ON Punctuation: Close punctuation, common, Other +U+002A ON Punctuation: Other punctuation, common, Other +U+002B ES Symbol: Mathematical symbol, common, Other +U+002C CS Punctuation: Other punctuation, common, Other +U+002D ES Punctuation: Dash punctuation, common, Other +U+002E CS Punctuation: Other punctuation, common, Other +U+002F CS Punctuation: Other punctuation, common, Other findprop 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f -U+0030 EN Number: Decimal number, Common, Other -U+0031 EN Number: Decimal number, Common, Other -U+0032 EN Number: Decimal number, Common, Other -U+0033 EN Number: Decimal number, Common, Other -U+0034 EN Number: Decimal number, Common, Other -U+0035 EN Number: Decimal number, Common, Other -U+0036 EN Number: Decimal number, Common, Other -U+0037 EN Number: Decimal number, Common, Other -U+0038 EN Number: Decimal number, Common, Other -U+0039 EN Number: Decimal number, Common, Other -U+003A CS Punctuation: Other punctuation, Common, Other -U+003B ON Punctuation: Other punctuation, Common, Other -U+003C ON Symbol: Mathematical symbol, Common, Other -U+003D ON Symbol: Mathematical symbol, Common, Other -U+003E ON Symbol: Mathematical symbol, Common, Other -U+003F ON Punctuation: Other punctuation, Common, Other +U+0030 EN Number: Decimal number, common, Other +U+0031 EN Number: Decimal number, common, Other +U+0032 EN Number: Decimal number, common, Other +U+0033 EN Number: Decimal number, common, Other +U+0034 EN Number: Decimal number, common, Other +U+0035 EN Number: Decimal number, common, Other +U+0036 EN Number: Decimal number, common, Other +U+0037 EN Number: Decimal number, common, Other +U+0038 EN Number: Decimal number, common, Other +U+0039 EN Number: Decimal number, common, Other +U+003A CS Punctuation: Other punctuation, common, Other +U+003B ON Punctuation: Other punctuation, common, Other +U+003C ON Symbol: Mathematical symbol, common, Other +U+003D ON Symbol: Mathematical symbol, common, Other +U+003E ON Symbol: Mathematical symbol, common, Other +U+003F ON Punctuation: Other punctuation, common, Other findprop 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f -U+0040 ON Punctuation: Other punctuation, Common, Other -U+0041 L Letter: Upper case letter, Latin, Other, U+0061 -U+0042 L Letter: Upper case letter, Latin, Other, U+0062 -U+0043 L Letter: Upper case letter, Latin, Other, U+0063 -U+0044 L Letter: Upper case letter, Latin, Other, U+0064 -U+0045 L Letter: Upper case letter, Latin, Other, U+0065 -U+0046 L Letter: Upper case letter, Latin, Other, U+0066 -U+0047 L Letter: Upper case letter, Latin, Other, U+0067 -U+0048 L Letter: Upper case letter, Latin, Other, U+0068 -U+0049 L Letter: Upper case letter, Latin, Other, U+0069 -U+004A L Letter: Upper case letter, Latin, Other, U+006A -U+004B L Letter: Upper case letter, Latin, Other, U+006B, U+212A -U+004C L Letter: Upper case letter, Latin, Other, U+006C -U+004D L Letter: Upper case letter, Latin, Other, U+006D -U+004E L Letter: Upper case letter, Latin, Other, U+006E -U+004F L Letter: Upper case letter, Latin, Other, U+006F +U+0040 ON Punctuation: Other punctuation, common, Other +U+0041 L Letter: Upper case letter, latin, Other, U+0061 +U+0042 L Letter: Upper case letter, latin, Other, U+0062 +U+0043 L Letter: Upper case letter, latin, Other, U+0063 +U+0044 L Letter: Upper case letter, latin, Other, U+0064 +U+0045 L Letter: Upper case letter, latin, Other, U+0065 +U+0046 L Letter: Upper case letter, latin, Other, U+0066 +U+0047 L Letter: Upper case letter, latin, Other, U+0067 +U+0048 L Letter: Upper case letter, latin, Other, U+0068 +U+0049 L Letter: Upper case letter, latin, Other, U+0069 +U+004A L Letter: Upper case letter, latin, Other, U+006A +U+004B L Letter: Upper case letter, latin, Other, U+006B, U+212A +U+004C L Letter: Upper case letter, latin, Other, U+006C +U+004D L Letter: Upper case letter, latin, Other, U+006D +U+004E L Letter: Upper case letter, latin, Other, U+006E +U+004F L Letter: Upper case letter, latin, Other, U+006F findprop 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f -U+0050 L Letter: Upper case letter, Latin, Other, U+0070 -U+0051 L Letter: Upper case letter, Latin, Other, U+0071 -U+0052 L Letter: Upper case letter, Latin, Other, U+0072 -U+0053 L Letter: Upper case letter, Latin, Other, U+0073, U+017F -U+0054 L Letter: Upper case letter, Latin, Other, U+0074 -U+0055 L Letter: Upper case letter, Latin, Other, U+0075 -U+0056 L Letter: Upper case letter, Latin, Other, U+0076 -U+0057 L Letter: Upper case letter, Latin, Other, U+0077 -U+0058 L Letter: Upper case letter, Latin, Other, U+0078 -U+0059 L Letter: Upper case letter, Latin, Other, U+0079 -U+005A L Letter: Upper case letter, Latin, Other, U+007A -U+005B ON Punctuation: Open punctuation, Common, Other -U+005C ON Punctuation: Other punctuation, Common, Other -U+005D ON Punctuation: Close punctuation, Common, Other -U+005E ON Symbol: Modifier symbol, Common, Other -U+005F ON Punctuation: Connector punctuation, Common, Other +U+0050 L Letter: Upper case letter, latin, Other, U+0070 +U+0051 L Letter: Upper case letter, latin, Other, U+0071 +U+0052 L Letter: Upper case letter, latin, Other, U+0072 +U+0053 L Letter: Upper case letter, latin, Other, U+0073, U+017F +U+0054 L Letter: Upper case letter, latin, Other, U+0074 +U+0055 L Letter: Upper case letter, latin, Other, U+0075 +U+0056 L Letter: Upper case letter, latin, Other, U+0076 +U+0057 L Letter: Upper case letter, latin, Other, U+0077 +U+0058 L Letter: Upper case letter, latin, Other, U+0078 +U+0059 L Letter: Upper case letter, latin, Other, U+0079 +U+005A L Letter: Upper case letter, latin, Other, U+007A +U+005B ON Punctuation: Open punctuation, common, Other +U+005C ON Punctuation: Other punctuation, common, Other +U+005D ON Punctuation: Close punctuation, common, Other +U+005E ON Symbol: Modifier symbol, common, Other +U+005F ON Punctuation: Connector punctuation, common, Other findprop 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f -U+0060 ON Symbol: Modifier symbol, Common, Other -U+0061 L Letter: Lower case letter, Latin, Other, U+0041 -U+0062 L Letter: Lower case letter, Latin, Other, U+0042 -U+0063 L Letter: Lower case letter, Latin, Other, U+0043 -U+0064 L Letter: Lower case letter, Latin, Other, U+0044 -U+0065 L Letter: Lower case letter, Latin, Other, U+0045 -U+0066 L Letter: Lower case letter, Latin, Other, U+0046 -U+0067 L Letter: Lower case letter, Latin, Other, U+0047 -U+0068 L Letter: Lower case letter, Latin, Other, U+0048 -U+0069 L Letter: Lower case letter, Latin, Other, U+0049 -U+006A L Letter: Lower case letter, Latin, Other, U+004A -U+006B L Letter: Lower case letter, Latin, Other, U+004B, U+212A -U+006C L Letter: Lower case letter, Latin, Other, U+004C -U+006D L Letter: Lower case letter, Latin, Other, U+004D -U+006E L Letter: Lower case letter, Latin, Other, U+004E -U+006F L Letter: Lower case letter, Latin, Other, U+004F +U+0060 ON Symbol: Modifier symbol, common, Other +U+0061 L Letter: Lower case letter, latin, Other, U+0041 +U+0062 L Letter: Lower case letter, latin, Other, U+0042 +U+0063 L Letter: Lower case letter, latin, Other, U+0043 +U+0064 L Letter: Lower case letter, latin, Other, U+0044 +U+0065 L Letter: Lower case letter, latin, Other, U+0045 +U+0066 L Letter: Lower case letter, latin, Other, U+0046 +U+0067 L Letter: Lower case letter, latin, Other, U+0047 +U+0068 L Letter: Lower case letter, latin, Other, U+0048 +U+0069 L Letter: Lower case letter, latin, Other, U+0049 +U+006A L Letter: Lower case letter, latin, Other, U+004A +U+006B L Letter: Lower case letter, latin, Other, U+004B, U+212A +U+006C L Letter: Lower case letter, latin, Other, U+004C +U+006D L Letter: Lower case letter, latin, Other, U+004D +U+006E L Letter: Lower case letter, latin, Other, U+004E +U+006F L Letter: Lower case letter, latin, Other, U+004F findprop 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f -U+0070 L Letter: Lower case letter, Latin, Other, U+0050 -U+0071 L Letter: Lower case letter, Latin, Other, U+0051 -U+0072 L Letter: Lower case letter, Latin, Other, U+0052 -U+0073 L Letter: Lower case letter, Latin, Other, U+0053, U+017F -U+0074 L Letter: Lower case letter, Latin, Other, U+0054 -U+0075 L Letter: Lower case letter, Latin, Other, U+0055 -U+0076 L Letter: Lower case letter, Latin, Other, U+0056 -U+0077 L Letter: Lower case letter, Latin, Other, U+0057 -U+0078 L Letter: Lower case letter, Latin, Other, U+0058 -U+0079 L Letter: Lower case letter, Latin, Other, U+0059 -U+007A L Letter: Lower case letter, Latin, Other, U+005A -U+007B ON Punctuation: Open punctuation, Common, Other -U+007C ON Symbol: Mathematical symbol, Common, Other -U+007D ON Punctuation: Close punctuation, Common, Other -U+007E ON Symbol: Mathematical symbol, Common, Other -U+007F BN Control: Control, Common, Control +U+0070 L Letter: Lower case letter, latin, Other, U+0050 +U+0071 L Letter: Lower case letter, latin, Other, U+0051 +U+0072 L Letter: Lower case letter, latin, Other, U+0052 +U+0073 L Letter: Lower case letter, latin, Other, U+0053, U+017F +U+0074 L Letter: Lower case letter, latin, Other, U+0054 +U+0075 L Letter: Lower case letter, latin, Other, U+0055 +U+0076 L Letter: Lower case letter, latin, Other, U+0056 +U+0077 L Letter: Lower case letter, latin, Other, U+0057 +U+0078 L Letter: Lower case letter, latin, Other, U+0058 +U+0079 L Letter: Lower case letter, latin, Other, U+0059 +U+007A L Letter: Lower case letter, latin, Other, U+005A +U+007B ON Punctuation: Open punctuation, common, Other +U+007C ON Symbol: Mathematical symbol, common, Other +U+007D ON Punctuation: Close punctuation, common, Other +U+007E ON Symbol: Mathematical symbol, common, Other +U+007F BN Control: Control, common, Control findprop 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f -U+0080 BN Control: Control, Common, Control -U+0081 BN Control: Control, Common, Control -U+0082 BN Control: Control, Common, Control -U+0083 BN Control: Control, Common, Control -U+0084 BN Control: Control, Common, Control -U+0085 B Control: Control, Common, Control -U+0086 BN Control: Control, Common, Control -U+0087 BN Control: Control, Common, Control -U+0088 BN Control: Control, Common, Control -U+0089 BN Control: Control, Common, Control -U+008A BN Control: Control, Common, Control -U+008B BN Control: Control, Common, Control -U+008C BN Control: Control, Common, Control -U+008D BN Control: Control, Common, Control -U+008E BN Control: Control, Common, Control -U+008F BN Control: Control, Common, Control +U+0080 BN Control: Control, common, Control +U+0081 BN Control: Control, common, Control +U+0082 BN Control: Control, common, Control +U+0083 BN Control: Control, common, Control +U+0084 BN Control: Control, common, Control +U+0085 B Control: Control, common, Control +U+0086 BN Control: Control, common, Control +U+0087 BN Control: Control, common, Control +U+0088 BN Control: Control, common, Control +U+0089 BN Control: Control, common, Control +U+008A BN Control: Control, common, Control +U+008B BN Control: Control, common, Control +U+008C BN Control: Control, common, Control +U+008D BN Control: Control, common, Control +U+008E BN Control: Control, common, Control +U+008F BN Control: Control, common, Control findprop 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f -U+0090 BN Control: Control, Common, Control -U+0091 BN Control: Control, Common, Control -U+0092 BN Control: Control, Common, Control -U+0093 BN Control: Control, Common, Control -U+0094 BN Control: Control, Common, Control -U+0095 BN Control: Control, Common, Control -U+0096 BN Control: Control, Common, Control -U+0097 BN Control: Control, Common, Control -U+0098 BN Control: Control, Common, Control -U+0099 BN Control: Control, Common, Control -U+009A BN Control: Control, Common, Control -U+009B BN Control: Control, Common, Control -U+009C BN Control: Control, Common, Control -U+009D BN Control: Control, Common, Control -U+009E BN Control: Control, Common, Control -U+009F BN Control: Control, Common, Control +U+0090 BN Control: Control, common, Control +U+0091 BN Control: Control, common, Control +U+0092 BN Control: Control, common, Control +U+0093 BN Control: Control, common, Control +U+0094 BN Control: Control, common, Control +U+0095 BN Control: Control, common, Control +U+0096 BN Control: Control, common, Control +U+0097 BN Control: Control, common, Control +U+0098 BN Control: Control, common, Control +U+0099 BN Control: Control, common, Control +U+009A BN Control: Control, common, Control +U+009B BN Control: Control, common, Control +U+009C BN Control: Control, common, Control +U+009D BN Control: Control, common, Control +U+009E BN Control: Control, common, Control +U+009F BN Control: Control, common, Control findprop a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af -U+00A0 CS Separator: Space separator, Common, Other -U+00A1 ON Punctuation: Other punctuation, Common, Other -U+00A2 ET Symbol: Currency symbol, Common, Other -U+00A3 ET Symbol: Currency symbol, Common, Other -U+00A4 ET Symbol: Currency symbol, Common, Other -U+00A5 ET Symbol: Currency symbol, Common, Other -U+00A6 ON Symbol: Other symbol, Common, Other -U+00A7 ON Punctuation: Other punctuation, Common, Other -U+00A8 ON Symbol: Modifier symbol, Common, Other -U+00A9 ON Symbol: Other symbol, Common, Extended Pictographic -U+00AA L Letter: Other letter, Latin, Other -U+00AB ON Punctuation: Initial punctuation, Common, Other -U+00AC ON Symbol: Mathematical symbol, Common, Other -U+00AD BN Control: Format, Common, Control -U+00AE ON Symbol: Other symbol, Common, Extended Pictographic -U+00AF ON Symbol: Modifier symbol, Common, Other +U+00A0 CS Separator: Space separator, common, Other +U+00A1 ON Punctuation: Other punctuation, common, Other +U+00A2 ET Symbol: Currency symbol, common, Other +U+00A3 ET Symbol: Currency symbol, common, Other +U+00A4 ET Symbol: Currency symbol, common, Other +U+00A5 ET Symbol: Currency symbol, common, Other +U+00A6 ON Symbol: Other symbol, common, Other +U+00A7 ON Punctuation: Other punctuation, common, Other +U+00A8 ON Symbol: Modifier symbol, common, Other +U+00A9 ON Symbol: Other symbol, common, Extended Pictographic +U+00AA L Letter: Other letter, latin, Other +U+00AB ON Punctuation: Initial punctuation, common, Other +U+00AC ON Symbol: Mathematical symbol, common, Other +U+00AD BN Control: Format, common, Control +U+00AE ON Symbol: Other symbol, common, Extended Pictographic +U+00AF ON Symbol: Modifier symbol, common, Other findprop b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf -U+00B0 ET Symbol: Other symbol, Common, Other -U+00B1 ET Symbol: Mathematical symbol, Common, Other -U+00B2 EN Number: Other number, Common, Other -U+00B3 EN Number: Other number, Common, Other -U+00B4 ON Symbol: Modifier symbol, Common, Other -U+00B5 L Letter: Lower case letter, Common, Other, U+03BC, U+039C -U+00B6 ON Punctuation: Other punctuation, Common, Other -U+00B7 ON Punctuation: Other punctuation, Common, Other -U+00B8 ON Symbol: Modifier symbol, Common, Other -U+00B9 EN Number: Other number, Common, Other -U+00BA L Letter: Other letter, Latin, Other -U+00BB ON Punctuation: Final punctuation, Common, Other -U+00BC ON Number: Other number, Common, Other -U+00BD ON Number: Other number, Common, Other -U+00BE ON Number: Other number, Common, Other -U+00BF ON Punctuation: Other punctuation, Common, Other +U+00B0 ET Symbol: Other symbol, common, Other +U+00B1 ET Symbol: Mathematical symbol, common, Other +U+00B2 EN Number: Other number, common, Other +U+00B3 EN Number: Other number, common, Other +U+00B4 ON Symbol: Modifier symbol, common, Other +U+00B5 L Letter: Lower case letter, common, Other, U+03BC, U+039C +U+00B6 ON Punctuation: Other punctuation, common, Other +U+00B7 ON Punctuation: Other punctuation, common, Other +U+00B8 ON Symbol: Modifier symbol, common, Other +U+00B9 EN Number: Other number, common, Other +U+00BA L Letter: Other letter, latin, Other +U+00BB ON Punctuation: Final punctuation, common, Other +U+00BC ON Number: Other number, common, Other +U+00BD ON Number: Other number, common, Other +U+00BE ON Number: Other number, common, Other +U+00BF ON Punctuation: Other punctuation, common, Other findprop c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf -U+00C0 L Letter: Upper case letter, Latin, Other, U+00E0 -U+00C1 L Letter: Upper case letter, Latin, Other, U+00E1 -U+00C2 L Letter: Upper case letter, Latin, Other, U+00E2 -U+00C3 L Letter: Upper case letter, Latin, Other, U+00E3 -U+00C4 L Letter: Upper case letter, Latin, Other, U+00E4 -U+00C5 L Letter: Upper case letter, Latin, Other, U+00E5, U+212B -U+00C6 L Letter: Upper case letter, Latin, Other, U+00E6 -U+00C7 L Letter: Upper case letter, Latin, Other, U+00E7 -U+00C8 L Letter: Upper case letter, Latin, Other, U+00E8 -U+00C9 L Letter: Upper case letter, Latin, Other, U+00E9 -U+00CA L Letter: Upper case letter, Latin, Other, U+00EA -U+00CB L Letter: Upper case letter, Latin, Other, U+00EB -U+00CC L Letter: Upper case letter, Latin, Other, U+00EC -U+00CD L Letter: Upper case letter, Latin, Other, U+00ED -U+00CE L Letter: Upper case letter, Latin, Other, U+00EE -U+00CF L Letter: Upper case letter, Latin, Other, U+00EF +U+00C0 L Letter: Upper case letter, latin, Other, U+00E0 +U+00C1 L Letter: Upper case letter, latin, Other, U+00E1 +U+00C2 L Letter: Upper case letter, latin, Other, U+00E2 +U+00C3 L Letter: Upper case letter, latin, Other, U+00E3 +U+00C4 L Letter: Upper case letter, latin, Other, U+00E4 +U+00C5 L Letter: Upper case letter, latin, Other, U+00E5, U+212B +U+00C6 L Letter: Upper case letter, latin, Other, U+00E6 +U+00C7 L Letter: Upper case letter, latin, Other, U+00E7 +U+00C8 L Letter: Upper case letter, latin, Other, U+00E8 +U+00C9 L Letter: Upper case letter, latin, Other, U+00E9 +U+00CA L Letter: Upper case letter, latin, Other, U+00EA +U+00CB L Letter: Upper case letter, latin, Other, U+00EB +U+00CC L Letter: Upper case letter, latin, Other, U+00EC +U+00CD L Letter: Upper case letter, latin, Other, U+00ED +U+00CE L Letter: Upper case letter, latin, Other, U+00EE +U+00CF L Letter: Upper case letter, latin, Other, U+00EF findprop d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db dc dd de df -U+00D0 L Letter: Upper case letter, Latin, Other, U+00F0 -U+00D1 L Letter: Upper case letter, Latin, Other, U+00F1 -U+00D2 L Letter: Upper case letter, Latin, Other, U+00F2 -U+00D3 L Letter: Upper case letter, Latin, Other, U+00F3 -U+00D4 L Letter: Upper case letter, Latin, Other, U+00F4 -U+00D5 L Letter: Upper case letter, Latin, Other, U+00F5 -U+00D6 L Letter: Upper case letter, Latin, Other, U+00F6 -U+00D7 ON Symbol: Mathematical symbol, Common, Other -U+00D8 L Letter: Upper case letter, Latin, Other, U+00F8 -U+00D9 L Letter: Upper case letter, Latin, Other, U+00F9 -U+00DA L Letter: Upper case letter, Latin, Other, U+00FA -U+00DB L Letter: Upper case letter, Latin, Other, U+00FB -U+00DC L Letter: Upper case letter, Latin, Other, U+00FC -U+00DD L Letter: Upper case letter, Latin, Other, U+00FD -U+00DE L Letter: Upper case letter, Latin, Other, U+00FE -U+00DF L Letter: Lower case letter, Latin, Other, U+1E9E +U+00D0 L Letter: Upper case letter, latin, Other, U+00F0 +U+00D1 L Letter: Upper case letter, latin, Other, U+00F1 +U+00D2 L Letter: Upper case letter, latin, Other, U+00F2 +U+00D3 L Letter: Upper case letter, latin, Other, U+00F3 +U+00D4 L Letter: Upper case letter, latin, Other, U+00F4 +U+00D5 L Letter: Upper case letter, latin, Other, U+00F5 +U+00D6 L Letter: Upper case letter, latin, Other, U+00F6 +U+00D7 ON Symbol: Mathematical symbol, common, Other +U+00D8 L Letter: Upper case letter, latin, Other, U+00F8 +U+00D9 L Letter: Upper case letter, latin, Other, U+00F9 +U+00DA L Letter: Upper case letter, latin, Other, U+00FA +U+00DB L Letter: Upper case letter, latin, Other, U+00FB +U+00DC L Letter: Upper case letter, latin, Other, U+00FC +U+00DD L Letter: Upper case letter, latin, Other, U+00FD +U+00DE L Letter: Upper case letter, latin, Other, U+00FE +U+00DF L Letter: Lower case letter, latin, Other, U+1E9E findprop e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ea eb ec ed ee ef -U+00E0 L Letter: Lower case letter, Latin, Other, U+00C0 -U+00E1 L Letter: Lower case letter, Latin, Other, U+00C1 -U+00E2 L Letter: Lower case letter, Latin, Other, U+00C2 -U+00E3 L Letter: Lower case letter, Latin, Other, U+00C3 -U+00E4 L Letter: Lower case letter, Latin, Other, U+00C4 -U+00E5 L Letter: Lower case letter, Latin, Other, U+00C5, U+212B -U+00E6 L Letter: Lower case letter, Latin, Other, U+00C6 -U+00E7 L Letter: Lower case letter, Latin, Other, U+00C7 -U+00E8 L Letter: Lower case letter, Latin, Other, U+00C8 -U+00E9 L Letter: Lower case letter, Latin, Other, U+00C9 -U+00EA L Letter: Lower case letter, Latin, Other, U+00CA -U+00EB L Letter: Lower case letter, Latin, Other, U+00CB -U+00EC L Letter: Lower case letter, Latin, Other, U+00CC -U+00ED L Letter: Lower case letter, Latin, Other, U+00CD -U+00EE L Letter: Lower case letter, Latin, Other, U+00CE -U+00EF L Letter: Lower case letter, Latin, Other, U+00CF +U+00E0 L Letter: Lower case letter, latin, Other, U+00C0 +U+00E1 L Letter: Lower case letter, latin, Other, U+00C1 +U+00E2 L Letter: Lower case letter, latin, Other, U+00C2 +U+00E3 L Letter: Lower case letter, latin, Other, U+00C3 +U+00E4 L Letter: Lower case letter, latin, Other, U+00C4 +U+00E5 L Letter: Lower case letter, latin, Other, U+00C5, U+212B +U+00E6 L Letter: Lower case letter, latin, Other, U+00C6 +U+00E7 L Letter: Lower case letter, latin, Other, U+00C7 +U+00E8 L Letter: Lower case letter, latin, Other, U+00C8 +U+00E9 L Letter: Lower case letter, latin, Other, U+00C9 +U+00EA L Letter: Lower case letter, latin, Other, U+00CA +U+00EB L Letter: Lower case letter, latin, Other, U+00CB +U+00EC L Letter: Lower case letter, latin, Other, U+00CC +U+00ED L Letter: Lower case letter, latin, Other, U+00CD +U+00EE L Letter: Lower case letter, latin, Other, U+00CE +U+00EF L Letter: Lower case letter, latin, Other, U+00CF findprop f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff -U+00F0 L Letter: Lower case letter, Latin, Other, U+00D0 -U+00F1 L Letter: Lower case letter, Latin, Other, U+00D1 -U+00F2 L Letter: Lower case letter, Latin, Other, U+00D2 -U+00F3 L Letter: Lower case letter, Latin, Other, U+00D3 -U+00F4 L Letter: Lower case letter, Latin, Other, U+00D4 -U+00F5 L Letter: Lower case letter, Latin, Other, U+00D5 -U+00F6 L Letter: Lower case letter, Latin, Other, U+00D6 -U+00F7 ON Symbol: Mathematical symbol, Common, Other -U+00F8 L Letter: Lower case letter, Latin, Other, U+00D8 -U+00F9 L Letter: Lower case letter, Latin, Other, U+00D9 -U+00FA L Letter: Lower case letter, Latin, Other, U+00DA -U+00FB L Letter: Lower case letter, Latin, Other, U+00DB -U+00FC L Letter: Lower case letter, Latin, Other, U+00DC -U+00FD L Letter: Lower case letter, Latin, Other, U+00DD -U+00FE L Letter: Lower case letter, Latin, Other, U+00DE -U+00FF L Letter: Lower case letter, Latin, Other, U+0178 +U+00F0 L Letter: Lower case letter, latin, Other, U+00D0 +U+00F1 L Letter: Lower case letter, latin, Other, U+00D1 +U+00F2 L Letter: Lower case letter, latin, Other, U+00D2 +U+00F3 L Letter: Lower case letter, latin, Other, U+00D3 +U+00F4 L Letter: Lower case letter, latin, Other, U+00D4 +U+00F5 L Letter: Lower case letter, latin, Other, U+00D5 +U+00F6 L Letter: Lower case letter, latin, Other, U+00D6 +U+00F7 ON Symbol: Mathematical symbol, common, Other +U+00F8 L Letter: Lower case letter, latin, Other, U+00D8 +U+00F9 L Letter: Lower case letter, latin, Other, U+00D9 +U+00FA L Letter: Lower case letter, latin, Other, U+00DA +U+00FB L Letter: Lower case letter, latin, Other, U+00DB +U+00FC L Letter: Lower case letter, latin, Other, U+00DC +U+00FD L Letter: Lower case letter, latin, Other, U+00DD +U+00FE L Letter: Lower case letter, latin, Other, U+00DE +U+00FF L Letter: Lower case letter, latin, Other, U+0178 findprop 0100 0101 0102 0103 0104 0105 0106 -U+0100 L Letter: Upper case letter, Latin, Other, U+0101 -U+0101 L Letter: Lower case letter, Latin, Other, U+0100 -U+0102 L Letter: Upper case letter, Latin, Other, U+0103 -U+0103 L Letter: Lower case letter, Latin, Other, U+0102 -U+0104 L Letter: Upper case letter, Latin, Other, U+0105 -U+0105 L Letter: Lower case letter, Latin, Other, U+0104 -U+0106 L Letter: Upper case letter, Latin, Other, U+0107 +U+0100 L Letter: Upper case letter, latin, Other, U+0101 +U+0101 L Letter: Lower case letter, latin, Other, U+0100 +U+0102 L Letter: Upper case letter, latin, Other, U+0103 +U+0103 L Letter: Lower case letter, latin, Other, U+0102 +U+0104 L Letter: Upper case letter, latin, Other, U+0105 +U+0105 L Letter: Lower case letter, latin, Other, U+0104 +U+0106 L Letter: Upper case letter, latin, Other, U+0107 findprop ffe0 ffe1 ffe2 ffe3 ffe4 ffe5 ffe6 ffe7 -U+FFE0 ET Symbol: Currency symbol, Common, Other -U+FFE1 ET Symbol: Currency symbol, Common, Other -U+FFE2 ON Symbol: Mathematical symbol, Common, Other -U+FFE3 ON Symbol: Modifier symbol, Common, Other -U+FFE4 ON Symbol: Other symbol, Common, Other -U+FFE5 ET Symbol: Currency symbol, Common, Other -U+FFE6 ET Symbol: Currency symbol, Common, Other -U+FFE7 L Control: Unassigned, Unknown, Other +U+FFE0 ET Symbol: Currency symbol, common, Other +U+FFE1 ET Symbol: Currency symbol, common, Other +U+FFE2 ON Symbol: Mathematical symbol, common, Other +U+FFE3 ON Symbol: Modifier symbol, common, Other +U+FFE4 ON Symbol: Other symbol, common, Other +U+FFE5 ET Symbol: Currency symbol, common, Other +U+FFE6 ET Symbol: Currency symbol, common, Other +U+FFE7 L Control: Unassigned, unknown, Other findprop ffe8 ffe9 ffea ffeb ffec ffed ffee ffef -U+FFE8 ON Symbol: Other symbol, Common, Other -U+FFE9 ON Symbol: Mathematical symbol, Common, Other -U+FFEA ON Symbol: Mathematical symbol, Common, Other -U+FFEB ON Symbol: Mathematical symbol, Common, Other -U+FFEC ON Symbol: Mathematical symbol, Common, Other -U+FFED ON Symbol: Other symbol, Common, Other -U+FFEE ON Symbol: Other symbol, Common, Other -U+FFEF L Control: Unassigned, Unknown, Other +U+FFE8 ON Symbol: Other symbol, common, Other +U+FFE9 ON Symbol: Mathematical symbol, common, Other +U+FFEA ON Symbol: Mathematical symbol, common, Other +U+FFEB ON Symbol: Mathematical symbol, common, Other +U+FFEC ON Symbol: Mathematical symbol, common, Other +U+FFED ON Symbol: Other symbol, common, Other +U+FFEE ON Symbol: Other symbol, common, Other +U+FFEF L Control: Unassigned, unknown, Other findprop fff8 fff9 fffa fffb fffc fffd fffe ffff -U+FFF8 BN Control: Unassigned, Unknown, Control -U+FFF9 ON Control: Format, Common, Control -U+FFFA ON Control: Format, Common, Control -U+FFFB ON Control: Format, Common, Control -U+FFFC ON Symbol: Other symbol, Common, Other -U+FFFD ON Symbol: Other symbol, Common, Other -U+FFFE BN Control: Unassigned, Unknown, Other -U+FFFF BN Control: Unassigned, Unknown, Other +U+FFF8 BN Control: Unassigned, unknown, Control +U+FFF9 ON Control: Format, common, Control +U+FFFA ON Control: Format, common, Control +U+FFFB ON Control: Format, common, Control +U+FFFC ON Symbol: Other symbol, common, Other +U+FFFD ON Symbol: Other symbol, common, Other +U+FFFE BN Control: Unassigned, unknown, Other +U+FFFF BN Control: Unassigned, unknown, Other findprop 10000 10001 e01ef f0000 100000 -U+10000 L Letter: Other letter, Linear_B, Other -U+10001 L Letter: Other letter, Linear_B, Other -U+E01EF NSM Mark: Non-spacing mark, Inherited, Extend -U+F0000 L Control: Private use, Unknown, Other -U+100000 L Control: Private use, Unknown, Other +U+10000 L Letter: Other letter, linearb, Other +U+10001 L Letter: Other letter, linearb, Other +U+E01EF NSM Mark: Non-spacing mark, inherited, Extend +U+F0000 L Control: Private use, unknown, Other +U+100000 L Control: Private use, unknown, Other findprop 1b00 12000 7c0 a840 10900 -U+1B00 NSM Mark: Non-spacing mark, Balinese, Extend -U+12000 L Letter: Other letter, Cuneiform, Other -U+07C0 R Number: Decimal number, Nko, Other -U+A840 L Letter: Other letter, Phags_Pa, Other -U+10900 R Letter: Other letter, Phoenician, Other +U+1B00 NSM Mark: Non-spacing mark, balinese, Extend +U+12000 L Letter: Other letter, cuneiform, Other +U+07C0 R Number: Decimal number, nko, Other +U+A840 L Letter: Other letter, phagspa, Other +U+10900 R Letter: Other letter, phoenician, Other findprop 1d79 a77d -U+1D79 L Letter: Lower case letter, Latin, Other, U+A77D -U+A77D L Letter: Upper case letter, Latin, Other, U+1D79 +U+1D79 L Letter: Lower case letter, latin, Other, U+A77D +U+A77D L Letter: Upper case letter, latin, Other, U+1D79 findprop 0800 083e a4d0 a4f7 aa80 aadf -U+0800 R Letter: Other letter, Samaritan, Other -U+083E R Punctuation: Other punctuation, Samaritan, Other -U+A4D0 L Letter: Other letter, Lisu, Other -U+A4F7 L Letter: Other letter, Lisu, Other -U+AA80 L Letter: Other letter, Tai_Viet, Other -U+AADF L Punctuation: Other punctuation, Tai_Viet, Other +U+0800 R Letter: Other letter, samaritan, Other +U+083E R Punctuation: Other punctuation, samaritan, Other +U+A4D0 L Letter: Other letter, lisu, Other +U+A4F7 L Letter: Other letter, lisu, Other +U+AA80 L Letter: Other letter, taiviet, Other +U+AADF L Punctuation: Other punctuation, taiviet, Other findprop 10b00 10b35 13000 1342e 10840 10855 -U+10B00 R Letter: Other letter, Avestan, Other -U+10B35 R Letter: Other letter, Avestan, Other -U+13000 L Letter: Other letter, Egyptian_Hieroglyphs, Other -U+1342E L Letter: Other letter, Egyptian_Hieroglyphs, Other -U+10840 R Letter: Other letter, Imperial_Aramaic, Other -U+10855 R Letter: Other letter, Imperial_Aramaic, Other +U+10B00 R Letter: Other letter, avestan, Other +U+10B35 R Letter: Other letter, avestan, Other +U+13000 L Letter: Other letter, egyptianhieroglyphs, Other +U+1342E L Letter: Other letter, egyptianhieroglyphs, Other +U+10840 R Letter: Other letter, imperialaramaic, Other +U+10855 R Letter: Other letter, imperialaramaic, Other findprop 11100 1113c 11680 116c0 -U+11100 NSM Mark: Non-spacing mark, Chakma, Extend -U+1113C L Number: Decimal number, Chakma, Other -U+11680 L Letter: Other letter, Takri, Other -U+116C0 L Number: Decimal number, Takri, Other +U+11100 NSM Mark: Non-spacing mark, chakma, Extend +U+1113C L Number: Decimal number, chakma, Other +U+11680 L Letter: Other letter, takri, Other +U+116C0 L Number: Decimal number, takri, Other findprop 0d 0a 0e 0711 1b04 1111 1169 11fe ae4c ad89 -U+000D B Control: Control, Common, CR -U+000A B Control: Control, Common, LF -U+000E BN Control: Control, Common, Control -U+0711 NSM Mark: Non-spacing mark, Syriac, Extend -U+1B04 L Mark: Spacing mark, Balinese, SpacingMark -U+1111 L Letter: Other letter, Hangul, Hangul syllable type L -U+1169 L Letter: Other letter, Hangul, Hangul syllable type V -U+11FE L Letter: Other letter, Hangul, Hangul syllable type T -U+AE4C L Letter: Other letter, Hangul, Hangul syllable type LV -U+AD89 L Letter: Other letter, Hangul, Hangul syllable type LVT +U+000D B Control: Control, common, CR +U+000A B Control: Control, common, LF +U+000E BN Control: Control, common, Control +U+0711 NSM Mark: Non-spacing mark, syriac, Extend +U+1B04 L Mark: Spacing mark, balinese, SpacingMark +U+1111 L Letter: Other letter, hangul, Hangul syllable type L +U+1169 L Letter: Other letter, hangul, Hangul syllable type V +U+11FE L Letter: Other letter, hangul, Hangul syllable type T +U+AE4C L Letter: Other letter, hangul, Hangul syllable type LV +U+AD89 L Letter: Other letter, hangul, Hangul syllable type LVT findprop 118a0 11ac7 16ad0 -U+118A0 L Letter: Upper case letter, Warang_Citi, Other, U+118C0 -U+11AC7 L Letter: Other letter, Pau_Cin_Hau, Other -U+16AD0 L Letter: Other letter, Bassa_Vah, Other +U+118A0 L Letter: Upper case letter, warangciti, Other, U+118C0 +U+11AC7 L Letter: Other letter, paucinhau, Other +U+16AD0 L Letter: Other letter, bassavah, Other findprop 11700 14400 108e0 11280 1d800 -U+11700 L Letter: Other letter, Ahom, Other -U+14400 L Letter: Other letter, Anatolian_Hieroglyphs, Other -U+108E0 R Letter: Other letter, Hatran, Other -U+11280 L Letter: Other letter, Multani, Other -U+1D800 L Symbol: Other symbol, SignWriting, Other +U+11700 L Letter: Other letter, ahom, Other +U+14400 L Letter: Other letter, anatolianhieroglyphs, Other +U+108E0 R Letter: Other letter, hatran, Other +U+11280 L Letter: Other letter, multani, Other +U+1D800 L Symbol: Other symbol, signwriting, Other findprop 11800 1e903 11da9 10d27 11ee0 16e48 10f27 10f30 -U+11800 L Letter: Other letter, Dogra, Other -U+1E903 R Letter: Upper case letter, Adlam, Other, U+1E925 -U+11DA9 L Number: Decimal number, Gunjala_Gondi, Other -U+10D27 NSM Mark: Non-spacing mark, Hanifi_Rohingya, Extend -U+11EE0 L Letter: Other letter, Makasar, Other -U+16E48 L Letter: Upper case letter, Medefaidrin, Other, U+16E68 -U+10F27 R Letter: Other letter, Old_Sogdian, Other -U+10F30 AL Letter: Other letter, Sogdian, Other +U+11800 L Letter: Other letter, dogra, Other +U+1E903 R Letter: Upper case letter, adlam, Other, U+1E925 +U+11DA9 L Number: Decimal number, gunjalagondi, Other +U+10D27 NSM Mark: Non-spacing mark, hanifirohingya, Extend +U+11EE0 L Letter: Other letter, makasar, Other +U+16E48 L Letter: Upper case letter, medefaidrin, Other, U+16E68 +U+10F27 R Letter: Other letter, oldsogdian, Other +U+10F30 AL Letter: Other letter, sogdian, Other findprop a836 a833 1cf4 20f0 1cd0 -U+A836 L Symbol: Other symbol, Common, Other, [Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kaithi, Mahajani, Modi, Khudawadi, Takri, Tirhuta] -U+A833 L Number: Other number, Common, Other, [Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kannada, Kaithi, Mahajani, Modi, Nandinagari, Khudawadi, Takri, Tirhuta] -U+1CF4 NSM Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Kannada] -U+20F0 NSM Mark: Non-spacing mark, Inherited, Extend, [Devanagari, Grantha, Latin] -U+1CD0 NSM Mark: Non-spacing mark, Inherited, Extend, [Bengali, Devanagari, Grantha, Kannada] +U+A836 L Symbol: Other symbol, common, Other, [devanagari, dogra, gujarati, gurmukhi, khojki, kaithi, mahajani, modi, khudawadi, takri, tirhuta] +U+A833 L Number: Other number, common, Other, [devanagari, dogra, gujarati, gurmukhi, khojki, kannada, kaithi, mahajani, modi, nandinagari, khudawadi, takri, tirhuta] +U+1CF4 NSM Mark: Non-spacing mark, inherited, Extend, [devanagari, grantha, kannada] +U+20F0 NSM Mark: Non-spacing mark, inherited, Extend, [devanagari, grantha, latin] +U+1CD0 NSM Mark: Non-spacing mark, inherited, Extend, [bengali, devanagari, grantha, kannada] findprop 32ff -U+32FF L Symbol: Other symbol, Common, Other, [Han] +U+32FF L Symbol: Other symbol, common, Other, [han] findprop 1f16d -U+1F16D ON Symbol: Other symbol, Common, Extended Pictographic +U+1F16D ON Symbol: Other symbol, common, Extended Pictographic findprop U+10e93 U+10eaa -U+10E93 R Letter: Other letter, Yezidi, Other -U+10EAA R Control: Unassigned, Unknown, Other +U+10E93 R Letter: Other letter, yezidi, Other +U+10EAA R Control: Unassigned, unknown, Other findprop 0602 202a 202b 202c 2068 2069 202d 202e 2067 -U+0602 AN Control: Format, Arabic, Prepend -U+202A *LRE Control: Format, Common, Control -U+202B *RLE Control: Format, Common, Control -U+202C *PDF Control: Format, Common, Control -U+2068 *FSI Control: Format, Common, Control -U+2069 *PDI Control: Format, Common, Control -U+202D *LRO Control: Format, Common, Control -U+202E *RLO Control: Format, Common, Control -U+2067 *RLI Control: Format, Common, Control +U+0602 AN Control: Format, arabic, Prepend +U+202A *LRE Control: Format, common, Control +U+202B *RLE Control: Format, common, Control +U+202C *PDF Control: Format, common, Control +U+2068 *FSI Control: Format, common, Control +U+2069 *PDI Control: Format, common, Control +U+202D *LRO Control: Format, common, Control +U+202E *RLO Control: Format, common, Control +U+2067 *RLI Control: Format, common, Control diff --git a/maint/ucptestdata/testoutput2 b/maint/ucptestdata/testoutput2 index b368e15..810262c 100644 --- a/maint/ucptestdata/testoutput2 +++ b/maint/ucptestdata/testoutput2 @@ -1,253 +1,220 @@ find script Han -U+2E80..U+2E99 ON Symbol: Other symbol, Han, Other -U+2E9B..U+2EF3 ON Symbol: Other symbol, Han, Other -U+2F00..U+2FD5 ON Symbol: Other symbol, Han, Other - U+3005 L Letter: Modifier letter, Han, Other - U+3007 L Number: Letter number, Han, Other -U+3021..U+3029 L Number: Letter number, Han, Other -U+3038..U+303A L Number: Letter number, Han, Other - U+303B L Letter: Modifier letter, Han, Other -U+3400..U+4DBF L Letter: Other letter, Han, Other -U+4E00..U+9FFF L Letter: Other letter, Han, Other -U+F900..U+FA6D L Letter: Other letter, Han, Other -U+FA70..U+FAD9 L Letter: Other letter, Han, Other - U+16FE2 ON Punctuation: Other punctuation, Han, Other - U+16FE3 L Letter: Modifier letter, Han, Other -U+16FF0..U+16FF1 L Mark: Spacing mark, Han, SpacingMark -U+20000..U+2A6DF L Letter: Other letter, Han, Other -U+2A700..U+2B738 L Letter: Other letter, Han, Other -U+2B740..U+2B81D L Letter: Other letter, Han, Other -U+2B820..U+2CEA1 L Letter: Other letter, Han, Other -U+2CEB0..U+2EBE0 L Letter: Other letter, Han, Other -U+2F800..U+2FA1D L Letter: Other letter, Han, Other -U+30000..U+3134A L Letter: Other letter, Han, Other +** Unrecognized script name "Han" find type Pe script Common scriptx Hangul -U+3009 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi] -U+300B ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi] -U+300D ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi] -U+300F ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi] -U+3011 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi] -U+3015 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi] -U+3017 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi] -U+3019 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi] -U+301B ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi] -U+301E..U+301F ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana] - U+FF63 ON Punctuation: Close punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana, Yi] +** Unrecognized script name "Common" find type Sk -U+005E ON Symbol: Modifier symbol, Common, Other -U+0060 ON Symbol: Modifier symbol, Common, Other -U+00A8 ON Symbol: Modifier symbol, Common, Other -U+00AF ON Symbol: Modifier symbol, Common, Other -U+00B4 ON Symbol: Modifier symbol, Common, Other -U+00B8 ON Symbol: Modifier symbol, Common, Other -U+02C2..U+02C5 ON Symbol: Modifier symbol, Common, Other -U+02D2..U+02DF ON Symbol: Modifier symbol, Common, Other -U+02E5..U+02E9 ON Symbol: Modifier symbol, Common, Other -U+02EA..U+02EB ON Symbol: Modifier symbol, Bopomofo, Other - U+02ED ON Symbol: Modifier symbol, Common, Other -U+02EF..U+02FF ON Symbol: Modifier symbol, Common, Other - U+0375 ON Symbol: Modifier symbol, Greek, Other - U+0384 ON Symbol: Modifier symbol, Greek, Other - U+0385 ON Symbol: Modifier symbol, Common, Other - U+0888 AL Symbol: Modifier symbol, Arabic, Other - U+1FBD ON Symbol: Modifier symbol, Greek, Other -U+1FBF..U+1FC1 ON Symbol: Modifier symbol, Greek, Other -U+1FCD..U+1FCF ON Symbol: Modifier symbol, Greek, Other -U+1FDD..U+1FDF ON Symbol: Modifier symbol, Greek, Other -U+1FED..U+1FEF ON Symbol: Modifier symbol, Greek, Other -U+1FFD..U+1FFE ON Symbol: Modifier symbol, Greek, Other -U+309B..U+309C ON Symbol: Modifier symbol, Common, Other, [Hiragana, Katakana] -U+A700..U+A707 ON Symbol: Modifier symbol, Common, Other, [Han, Latin] -U+A708..U+A716 ON Symbol: Modifier symbol, Common, Other -U+A720..U+A721 ON Symbol: Modifier symbol, Common, Other -U+A789..U+A78A L Symbol: Modifier symbol, Common, Other - U+AB5B L Symbol: Modifier symbol, Common, Other -U+AB6A..U+AB6B ON Symbol: Modifier symbol, Common, Other -U+FBB2..U+FBC2 AL Symbol: Modifier symbol, Arabic, Other - U+FF3E ON Symbol: Modifier symbol, Common, Other - U+FF40 ON Symbol: Modifier symbol, Common, Other - U+FFE3 ON Symbol: Modifier symbol, Common, Other -U+1F3FB..U+1F3FF ON Symbol: Modifier symbol, Common, Extend +U+005E ON Symbol: Modifier symbol, common, Other +U+0060 ON Symbol: Modifier symbol, common, Other +U+00A8 ON Symbol: Modifier symbol, common, Other +U+00AF ON Symbol: Modifier symbol, common, Other +U+00B4 ON Symbol: Modifier symbol, common, Other +U+00B8 ON Symbol: Modifier symbol, common, Other +U+02C2..U+02C5 ON Symbol: Modifier symbol, common, Other +U+02D2..U+02DF ON Symbol: Modifier symbol, common, Other +U+02E5..U+02E9 ON Symbol: Modifier symbol, common, Other +U+02EA..U+02EB ON Symbol: Modifier symbol, bopomofo, Other + U+02ED ON Symbol: Modifier symbol, common, Other +U+02EF..U+02FF ON Symbol: Modifier symbol, common, Other + U+0375 ON Symbol: Modifier symbol, greek, Other + U+0384 ON Symbol: Modifier symbol, greek, Other + U+0385 ON Symbol: Modifier symbol, common, Other + U+0888 AL Symbol: Modifier symbol, arabic, Other + U+1FBD ON Symbol: Modifier symbol, greek, Other +U+1FBF..U+1FC1 ON Symbol: Modifier symbol, greek, Other +U+1FCD..U+1FCF ON Symbol: Modifier symbol, greek, Other +U+1FDD..U+1FDF ON Symbol: Modifier symbol, greek, Other +U+1FED..U+1FEF ON Symbol: Modifier symbol, greek, Other +U+1FFD..U+1FFE ON Symbol: Modifier symbol, greek, Other +U+309B..U+309C ON Symbol: Modifier symbol, common, Other, [hiragana, katakana] +U+A700..U+A707 ON Symbol: Modifier symbol, common, Other, [han, latin] +U+A708..U+A716 ON Symbol: Modifier symbol, common, Other +U+A720..U+A721 ON Symbol: Modifier symbol, common, Other +U+A789..U+A78A L Symbol: Modifier symbol, common, Other + U+AB5B L Symbol: Modifier symbol, common, Other +U+AB6A..U+AB6B ON Symbol: Modifier symbol, common, Other +U+FBB2..U+FBC2 AL Symbol: Modifier symbol, arabic, Other + U+FF3E ON Symbol: Modifier symbol, common, Other + U+FF40 ON Symbol: Modifier symbol, common, Other + U+FFE3 ON Symbol: Modifier symbol, common, Other +U+1F3FB..U+1F3FF ON Symbol: Modifier symbol, common, Extend find type Pd -U+002D ES Punctuation: Dash punctuation, Common, Other -U+058A ON Punctuation: Dash punctuation, Armenian, Other -U+05BE R Punctuation: Dash punctuation, Hebrew, Other -U+1400 ON Punctuation: Dash punctuation, Canadian_Aboriginal, Other -U+1806 ON Punctuation: Dash punctuation, Mongolian, Other -U+2010..U+2015 ON Punctuation: Dash punctuation, Common, Other - U+2E17 ON Punctuation: Dash punctuation, Common, Other - U+2E1A ON Punctuation: Dash punctuation, Common, Other -U+2E3A..U+2E3B ON Punctuation: Dash punctuation, Common, Other - U+2E40 ON Punctuation: Dash punctuation, Common, Other - U+2E5D ON Punctuation: Dash punctuation, Common, Other - U+301C ON Punctuation: Dash punctuation, Common, Other, [Bopomofo, Hangul, Han, Hiragana, Katakana] - U+3030 ON Punctuation: Dash punctuation, Common, Extended Pictographic, [Bopomofo, Hangul, Han, Hiragana, Katakana] - U+30A0 ON Punctuation: Dash punctuation, Common, Other, [Hiragana, Katakana] -U+FE31..U+FE32 ON Punctuation: Dash punctuation, Common, Other - U+FE58 ON Punctuation: Dash punctuation, Common, Other - U+FE63 ES Punctuation: Dash punctuation, Common, Other - U+FF0D ES Punctuation: Dash punctuation, Common, Other - U+10EAD R Punctuation: Dash punctuation, Yezidi, Other +U+002D ES Punctuation: Dash punctuation, common, Other +U+058A ON Punctuation: Dash punctuation, armenian, Other +U+05BE R Punctuation: Dash punctuation, hebrew, Other +U+1400 ON Punctuation: Dash punctuation, canadianaboriginal, Other +U+1806 ON Punctuation: Dash punctuation, mongolian, Other +U+2010..U+2015 ON Punctuation: Dash punctuation, common, Other + U+2E17 ON Punctuation: Dash punctuation, common, Other + U+2E1A ON Punctuation: Dash punctuation, common, Other +U+2E3A..U+2E3B ON Punctuation: Dash punctuation, common, Other + U+2E40 ON Punctuation: Dash punctuation, common, Other + U+2E5D ON Punctuation: Dash punctuation, common, Other + U+301C ON Punctuation: Dash punctuation, common, Other, [bopomofo, hangul, han, hiragana, katakana] + U+3030 ON Punctuation: Dash punctuation, common, Extended Pictographic, [bopomofo, hangul, han, hiragana, katakana] + U+30A0 ON Punctuation: Dash punctuation, common, Other, [hiragana, katakana] +U+FE31..U+FE32 ON Punctuation: Dash punctuation, common, Other + U+FE58 ON Punctuation: Dash punctuation, common, Other + U+FE63 ES Punctuation: Dash punctuation, common, Other + U+FF0D ES Punctuation: Dash punctuation, common, Other + U+10EAD R Punctuation: Dash punctuation, yezidi, Other find gbreak LVT -U+AC01..U+AC1B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AC1D..U+AC37 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AC39..U+AC53 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AC55..U+AC6F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AC71..U+AC8B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AC8D..U+ACA7 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+ACA9..U+ACC3 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+ACC5..U+ACDF L Letter: Other letter, Hangul, Hangul syllable type LVT -U+ACE1..U+ACFB L Letter: Other letter, Hangul, Hangul syllable type LVT -U+ACFD..U+AD17 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AD19..U+AD33 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AD35..U+AD4F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AD51..U+AD6B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AD6D..U+AD87 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AD89..U+ADA3 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+ADA5..U+ADBF L Letter: Other letter, Hangul, Hangul syllable type LVT -U+ADC1..U+ADDB L Letter: Other letter, Hangul, Hangul syllable type LVT -U+ADDD..U+ADF7 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+ADF9..U+AE13 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AE15..U+AE2F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AE31..U+AE4B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AE4D..U+AE67 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AE69..U+AE83 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AE85..U+AE9F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AEA1..U+AEBB L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AEBD..U+AED7 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AED9..U+AEF3 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AEF5..U+AF0F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AF11..U+AF2B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AF2D..U+AF47 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AF49..U+AF63 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AF65..U+AF7F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AF81..U+AF9B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AF9D..U+AFB7 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AFB9..U+AFD3 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AFD5..U+AFEF L Letter: Other letter, Hangul, Hangul syllable type LVT -U+AFF1..U+B00B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B00D..U+B027 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B029..U+B043 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B045..U+B05F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B061..U+B07B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B07D..U+B097 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B099..U+B0B3 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B0B5..U+B0CF L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B0D1..U+B0EB L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B0ED..U+B107 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B109..U+B123 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B125..U+B13F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B141..U+B15B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B15D..U+B177 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B179..U+B193 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B195..U+B1AF L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B1B1..U+B1CB L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B1CD..U+B1E7 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B1E9..U+B203 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B205..U+B21F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B221..U+B23B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B23D..U+B257 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B259..U+B273 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B275..U+B28F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B291..U+B2AB L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B2AD..U+B2C7 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B2C9..U+B2E3 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B2E5..U+B2FF L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B301..U+B31B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B31D..U+B337 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B339..U+B353 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B355..U+B36F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B371..U+B38B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B38D..U+B3A7 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B3A9..U+B3C3 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B3C5..U+B3DF L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B3E1..U+B3FB L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B3FD..U+B417 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B419..U+B433 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B435..U+B44F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B451..U+B46B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B46D..U+B487 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B489..U+B4A3 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B4A5..U+B4BF L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B4C1..U+B4DB L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B4DD..U+B4F7 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B4F9..U+B513 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B515..U+B52F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B531..U+B54B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B54D..U+B567 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B569..U+B583 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B585..U+B59F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B5A1..U+B5BB L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B5BD..U+B5D7 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B5D9..U+B5F3 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B5F5..U+B60F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B611..U+B62B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B62D..U+B647 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B649..U+B663 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B665..U+B67F L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B681..U+B69B L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B69D..U+B6B7 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B6B9..U+B6D3 L Letter: Other letter, Hangul, Hangul syllable type LVT -U+B6D5..U+B6EF L Letter: Other letter, Hangul, Hangul syllable type LVT +U+AC01..U+AC1B L Letter: Other letter, hangul, Hangul syllable type LVT +U+AC1D..U+AC37 L Letter: Other letter, hangul, Hangul syllable type LVT +U+AC39..U+AC53 L Letter: Other letter, hangul, Hangul syllable type LVT +U+AC55..U+AC6F L Letter: Other letter, hangul, Hangul syllable type LVT +U+AC71..U+AC8B L Letter: Other letter, hangul, Hangul syllable type LVT +U+AC8D..U+ACA7 L Letter: Other letter, hangul, Hangul syllable type LVT +U+ACA9..U+ACC3 L Letter: Other letter, hangul, Hangul syllable type LVT +U+ACC5..U+ACDF L Letter: Other letter, hangul, Hangul syllable type LVT +U+ACE1..U+ACFB L Letter: Other letter, hangul, Hangul syllable type LVT +U+ACFD..U+AD17 L Letter: Other letter, hangul, Hangul syllable type LVT +U+AD19..U+AD33 L Letter: Other letter, hangul, Hangul syllable type LVT +U+AD35..U+AD4F L Letter: Other letter, hangul, Hangul syllable type LVT +U+AD51..U+AD6B L Letter: Other letter, hangul, Hangul syllable type LVT +U+AD6D..U+AD87 L Letter: Other letter, hangul, Hangul syllable type LVT +U+AD89..U+ADA3 L Letter: Other letter, hangul, Hangul syllable type LVT +U+ADA5..U+ADBF L Letter: Other letter, hangul, Hangul syllable type LVT +U+ADC1..U+ADDB L Letter: Other letter, hangul, Hangul syllable type LVT +U+ADDD..U+ADF7 L Letter: Other letter, hangul, Hangul syllable type LVT +U+ADF9..U+AE13 L Letter: Other letter, hangul, Hangul syllable type LVT +U+AE15..U+AE2F L Letter: Other letter, hangul, Hangul syllable type LVT +U+AE31..U+AE4B L Letter: Other letter, hangul, Hangul syllable type LVT +U+AE4D..U+AE67 L Letter: Other letter, hangul, Hangul syllable type LVT +U+AE69..U+AE83 L Letter: Other letter, hangul, Hangul syllable type LVT +U+AE85..U+AE9F L Letter: Other letter, hangul, Hangul syllable type LVT +U+AEA1..U+AEBB L Letter: Other letter, hangul, Hangul syllable type LVT +U+AEBD..U+AED7 L Letter: Other letter, hangul, Hangul syllable type LVT +U+AED9..U+AEF3 L Letter: Other letter, hangul, Hangul syllable type LVT +U+AEF5..U+AF0F L Letter: Other letter, hangul, Hangul syllable type LVT +U+AF11..U+AF2B L Letter: Other letter, hangul, Hangul syllable type LVT +U+AF2D..U+AF47 L Letter: Other letter, hangul, Hangul syllable type LVT +U+AF49..U+AF63 L Letter: Other letter, hangul, Hangul syllable type LVT +U+AF65..U+AF7F L Letter: Other letter, hangul, Hangul syllable type LVT +U+AF81..U+AF9B L Letter: Other letter, hangul, Hangul syllable type LVT +U+AF9D..U+AFB7 L Letter: Other letter, hangul, Hangul syllable type LVT +U+AFB9..U+AFD3 L Letter: Other letter, hangul, Hangul syllable type LVT +U+AFD5..U+AFEF L Letter: Other letter, hangul, Hangul syllable type LVT +U+AFF1..U+B00B L Letter: Other letter, hangul, Hangul syllable type LVT +U+B00D..U+B027 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B029..U+B043 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B045..U+B05F L Letter: Other letter, hangul, Hangul syllable type LVT +U+B061..U+B07B L Letter: Other letter, hangul, Hangul syllable type LVT +U+B07D..U+B097 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B099..U+B0B3 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B0B5..U+B0CF L Letter: Other letter, hangul, Hangul syllable type LVT +U+B0D1..U+B0EB L Letter: Other letter, hangul, Hangul syllable type LVT +U+B0ED..U+B107 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B109..U+B123 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B125..U+B13F L Letter: Other letter, hangul, Hangul syllable type LVT +U+B141..U+B15B L Letter: Other letter, hangul, Hangul syllable type LVT +U+B15D..U+B177 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B179..U+B193 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B195..U+B1AF L Letter: Other letter, hangul, Hangul syllable type LVT +U+B1B1..U+B1CB L Letter: Other letter, hangul, Hangul syllable type LVT +U+B1CD..U+B1E7 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B1E9..U+B203 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B205..U+B21F L Letter: Other letter, hangul, Hangul syllable type LVT +U+B221..U+B23B L Letter: Other letter, hangul, Hangul syllable type LVT +U+B23D..U+B257 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B259..U+B273 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B275..U+B28F L Letter: Other letter, hangul, Hangul syllable type LVT +U+B291..U+B2AB L Letter: Other letter, hangul, Hangul syllable type LVT +U+B2AD..U+B2C7 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B2C9..U+B2E3 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B2E5..U+B2FF L Letter: Other letter, hangul, Hangul syllable type LVT +U+B301..U+B31B L Letter: Other letter, hangul, Hangul syllable type LVT +U+B31D..U+B337 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B339..U+B353 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B355..U+B36F L Letter: Other letter, hangul, Hangul syllable type LVT +U+B371..U+B38B L Letter: Other letter, hangul, Hangul syllable type LVT +U+B38D..U+B3A7 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B3A9..U+B3C3 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B3C5..U+B3DF L Letter: Other letter, hangul, Hangul syllable type LVT +U+B3E1..U+B3FB L Letter: Other letter, hangul, Hangul syllable type LVT +U+B3FD..U+B417 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B419..U+B433 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B435..U+B44F L Letter: Other letter, hangul, Hangul syllable type LVT +U+B451..U+B46B L Letter: Other letter, hangul, Hangul syllable type LVT +U+B46D..U+B487 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B489..U+B4A3 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B4A5..U+B4BF L Letter: Other letter, hangul, Hangul syllable type LVT +U+B4C1..U+B4DB L Letter: Other letter, hangul, Hangul syllable type LVT +U+B4DD..U+B4F7 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B4F9..U+B513 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B515..U+B52F L Letter: Other letter, hangul, Hangul syllable type LVT +U+B531..U+B54B L Letter: Other letter, hangul, Hangul syllable type LVT +U+B54D..U+B567 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B569..U+B583 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B585..U+B59F L Letter: Other letter, hangul, Hangul syllable type LVT +U+B5A1..U+B5BB L Letter: Other letter, hangul, Hangul syllable type LVT +U+B5BD..U+B5D7 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B5D9..U+B5F3 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B5F5..U+B60F L Letter: Other letter, hangul, Hangul syllable type LVT +U+B611..U+B62B L Letter: Other letter, hangul, Hangul syllable type LVT +U+B62D..U+B647 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B649..U+B663 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B665..U+B67F L Letter: Other letter, hangul, Hangul syllable type LVT +U+B681..U+B69B L Letter: Other letter, hangul, Hangul syllable type LVT +U+B69D..U+B6B7 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B6B9..U+B6D3 L Letter: Other letter, hangul, Hangul syllable type LVT +U+B6D5..U+B6EF L Letter: Other letter, hangul, Hangul syllable type LVT ... find script Old_Uyghur -U+10F70..U+10F81 R Letter: Other letter, Old_Uyghur, Other -U+10F82..U+10F85 NSM Mark: Non-spacing mark, Old_Uyghur, Extend -U+10F86..U+10F89 R Punctuation: Other punctuation, Old_Uyghur, Other +** Unrecognized script name "Old_Uyghur" find bidi PDF -U+202C *PDF Control: Format, Common, Control +U+202C *PDF Control: Format, common, Control find bidi CS -U+002C CS Punctuation: Other punctuation, Common, Other -U+002E..U+002F CS Punctuation: Other punctuation, Common, Other - U+003A CS Punctuation: Other punctuation, Common, Other - U+00A0 CS Separator: Space separator, Common, Other - U+060C CS Punctuation: Other punctuation, Common, Other, [Arabic, Nko, Hanifi_Rohingya, Syriac, Thaana, Yezidi] - U+202F CS Separator: Space separator, Common, Other, [Latin, Mongolian] - U+2044 CS Symbol: Mathematical symbol, Common, Other - U+FE50 CS Punctuation: Other punctuation, Common, Other - U+FE52 CS Punctuation: Other punctuation, Common, Other - U+FE55 CS Punctuation: Other punctuation, Common, Other - U+FF0C CS Punctuation: Other punctuation, Common, Other -U+FF0E..U+FF0F CS Punctuation: Other punctuation, Common, Other - U+FF1A CS Punctuation: Other punctuation, Common, Other +U+002C CS Punctuation: Other punctuation, common, Other +U+002E..U+002F CS Punctuation: Other punctuation, common, Other + U+003A CS Punctuation: Other punctuation, common, Other + U+00A0 CS Separator: Space separator, common, Other + U+060C CS Punctuation: Other punctuation, common, Other, [arabic, nko, hanifirohingya, syriac, thaana, yezidi] + U+202F CS Separator: Space separator, common, Other, [latin, mongolian] + U+2044 CS Symbol: Mathematical symbol, common, Other + U+FE50 CS Punctuation: Other punctuation, common, Other + U+FE52 CS Punctuation: Other punctuation, common, Other + U+FE55 CS Punctuation: Other punctuation, common, Other + U+FF0C CS Punctuation: Other punctuation, common, Other +U+FF0E..U+FF0F CS Punctuation: Other punctuation, common, Other + U+FF1A CS Punctuation: Other punctuation, common, Other find bidi CS type Sm -U+2044 CS Symbol: Mathematical symbol, Common, Other +U+2044 CS Symbol: Mathematical symbol, common, Other find bidi B -U+000A B Control: Control, Common, LF -U+000D B Control: Control, Common, CR -U+001C..U+001E B Control: Control, Common, Control - U+0085 B Control: Control, Common, Control - U+2029 B Separator: Paragraph separator, Common, Control +U+000A B Control: Control, common, LF +U+000D B Control: Control, common, CR +U+001C..U+001E B Control: Control, common, Control + U+0085 B Control: Control, common, Control + U+2029 B Separator: Paragraph separator, common, Control find bidi FSI -U+2068 *FSI Control: Format, Common, Control +U+2068 *FSI Control: Format, common, Control find bidi PDI -U+2069 *PDI Control: Format, Common, Control +U+2069 *PDI Control: Format, common, Control find bidi RLI -U+2067 *RLI Control: Format, Common, Control +U+2067 *RLI Control: Format, common, Control find bidi RLO -U+202E *RLO Control: Format, Common, Control +U+202E *RLO Control: Format, common, Control find bidi S -U+0009 S Control: Control, Common, Control -U+000B S Control: Control, Common, Control -U+001F S Control: Control, Common, Control +U+0009 S Control: Control, common, Control +U+000B S Control: Control, common, Control +U+001F S Control: Control, common, Control find bidi WS -U+000C WS Control: Control, Common, Control -U+0020 WS Separator: Space separator, Common, Other -U+1680 WS Separator: Space separator, Ogham, Other -U+2000..U+200A WS Separator: Space separator, Common, Other - U+2028 WS Separator: Line separator, Common, Control - U+205F WS Separator: Space separator, Common, Other - U+3000 WS Separator: Space separator, Common, Other +U+000C WS Control: Control, common, Control +U+0020 WS Separator: Space separator, common, Other +U+1680 WS Separator: Space separator, ogham, Other +U+2000..U+200A WS Separator: Space separator, common, Other + U+2028 WS Separator: Line separator, common, Control + U+205F WS Separator: Space separator, common, Other + U+3000 WS Separator: Space separator, common, Other find bidi_control -U+061C *AL Control: Format, Arabic, Control, [Arabic, Syriac, Thaana] -U+200E *L Control: Format, Common, Control -U+200F *R Control: Format, Common, Control -U+202A *LRE Control: Format, Common, Control -U+202B *RLE Control: Format, Common, Control -U+202C *PDF Control: Format, Common, Control -U+202D *LRO Control: Format, Common, Control -U+202E *RLO Control: Format, Common, Control -U+2066 *LRT Control: Format, Common, Control -U+2067 *RLI Control: Format, Common, Control -U+2068 *FSI Control: Format, Common, Control -U+2069 *PDI Control: Format, Common, Control +U+061C *AL Control: Format, arabic, Control, [arabic, syriac, thaana] +U+200E *L Control: Format, common, Control +U+200F *R Control: Format, common, Control +U+202A *LRE Control: Format, common, Control +U+202B *RLE Control: Format, common, Control +U+202C *PDF Control: Format, common, Control +U+202D *LRO Control: Format, common, Control +U+202E *RLO Control: Format, common, Control +U+2066 *LRI Control: Format, common, Control +U+2067 *RLI Control: Format, common, Control +U+2068 *FSI Control: Format, common, Control +U+2069 *PDI Control: Format, common, Control diff --git a/src/pcre2_auto_possess.c b/src/pcre2_auto_possess.c index e5e0895..f4dc3b5 100644 --- a/src/pcre2_auto_possess.c +++ b/src/pcre2_auto_possess.c @@ -123,18 +123,20 @@ opcode is used to select the column. The values are as follows: */ static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = { -/* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */ - { 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0 }, /* PT_LAMP */ - { 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0 }, /* PT_GC */ - { 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0 }, /* PT_PC */ - { 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ - { 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0 }, /* PT_ALNUM */ - { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_SPACE */ - { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_PXSPACE */ - { 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0 }, /* PT_WORD */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 } /* PT_UCNC */ +/* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BIDICO */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */ + { 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 1 }, /* PT_LAMP */ + { 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */ + { 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */ + { 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ + { 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0, 0, 1 }, /* PT_ALNUM */ + { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0, 0, 1 }, /* PT_SPACE */ + { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0, 0, 1 }, /* PT_PXSPACE */ + { 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0, 0, 1 }, /* PT_WORD */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */ + { 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 } /* PT_BIDICO */ }; /* This table is used to check whether auto-possessification is possible @@ -251,6 +253,14 @@ switch(ptype) if (c == *p++) return negated; } break; /* Control never reaches here */ + + /* Haven't yet thought these through. */ + + case PT_BIDICL: + return FALSE; + + case PT_BIDICO: + return FALSE; } return FALSE; diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 383159b..09f51ac 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -2088,7 +2088,8 @@ get_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, uint16_t *ptypeptr, PCRE2_UCHAR c; PCRE2_SIZE i, bot, top; PCRE2_SPTR ptr = *ptrptr; -PCRE2_UCHAR name[32]; +PCRE2_UCHAR name[50]; +PCRE2_UCHAR *vptr = NULL; if (ptr >= cb->end_pattern) goto ERROR_RETURN; c = *ptr++; @@ -2109,9 +2110,11 @@ if (c == CHAR_LEFT_CURLY_BRACKET) { if (ptr >= cb->end_pattern) goto ERROR_RETURN; c = *ptr++; + while (c == '_' || c == '-' || isspace(c)) c = *ptr++; if (c == CHAR_NUL) goto ERROR_RETURN; if (c == CHAR_RIGHT_CURLY_BRACKET) break; - name[i] = c; + name[i] = tolower(c); + if (c == ':' || c == '=') vptr = name + i; } if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN; name[i] = 0; @@ -2122,13 +2125,28 @@ letter. */ else if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0) { - name[0] = c; + name[0] = tolower(c); name[1] = 0; } else goto ERROR_RETURN; *ptrptr = ptr; +/* If the property contains ':' or '=' we have class name and value separately +specified. The only case currently supported is Bidi_Class, for which the +property names are "bidi". */ + +if (vptr != NULL) + { + *vptr = 0; /* Terminate class name */ + if (PRIV(strcmp_c8)(name, "bidiclass") != 0) + { + *errorcodeptr = ERR47; + return FALSE; + } + memmove(name + 4, vptr + 1, (name + i - vptr)*sizeof(PCRE2_UCHAR)); + } + /* Search for a recognized property name using binary chop. */ bot = 0; @@ -2147,6 +2165,7 @@ while (bot < top) } if (r > 0) bot = i + 1; else top = i; } + *errorcodeptr = ERR47; /* Unrecognized name */ return FALSE; diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index 004252f..f0570b9 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -1240,6 +1240,14 @@ for (;;) c >= 0xe000; break; + case PT_BIDICO: + OK = UCD_BIDICONTROL(c) != 0; + break; + + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[2]; + break; + /* Should never occur, but keep compilers from grumbling. */ default: @@ -1498,6 +1506,14 @@ for (;;) c >= 0xe000; break; + case PT_BIDICO: + OK = UCD_BIDICONTROL(c) != 0; + break; + + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[3]; + break; + /* Should never occur, but keep compilers from grumbling. */ default: @@ -1739,6 +1755,14 @@ for (;;) c >= 0xe000; break; + case PT_BIDICO: + OK = UCD_BIDICONTROL(c) != 0; + break; + + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[3]; + break; + /* Should never occur, but keep compilers from grumbling. */ default: @@ -2005,6 +2029,14 @@ for (;;) c >= 0xe000; break; + case PT_BIDICO: + OK = UCD_BIDICONTROL(c) != 0; + break; + + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[1 + IMM2_SIZE + 2]; + break; + /* Should never occur, but keep compilers from grumbling. */ default: diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index 30f45bb..2901352 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -1262,12 +1262,14 @@ only. */ #define PT_PC 3 /* Specified particular characteristic (e.g. Lu) */ #define PT_SC 4 /* Script (e.g. Han) */ #define PT_ALNUM 5 /* Alphanumeric - the union of L and N */ -#define PT_SPACE 6 /* Perl space - Z plus 9,10,12,13 */ +#define PT_SPACE 6 /* Perl space - general category Z plus 9,10,12,13 */ #define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */ #define PT_WORD 8 /* Word - L plus N plus underscore */ #define PT_CLIST 9 /* Pseudo-property: match character list */ #define PT_UCNC 10 /* Universal Character nameable character */ -#define PT_TABSIZE 11 /* Size of square table for autopossessify tests */ +#define PT_BIDICL 11 /* Specified bidi class */ +#define PT_BIDICO 12 /* Bidi control character */ +#define PT_TABSIZE 13 /* Size of square table for autopossessify tests */ /* The following special properties are used only in XCLASS items, when POSIX classes are specified and PCRE2_UCP is set - in other words, for Unicode @@ -1275,22 +1277,22 @@ handling of these classes. They are not available via the \p or \P escapes like those in the above list, and so they do not take part in the autopossessifying table. */ -#define PT_PXGRAPH 11 /* [:graph:] - characters that mark the paper */ -#define PT_PXPRINT 12 /* [:print:] - [:graph:] plus non-control spaces */ -#define PT_PXPUNCT 13 /* [:punct:] - punctuation characters */ +#define PT_PXGRAPH 13 /* [:graph:] - characters that mark the paper */ +#define PT_PXPRINT 14 /* [:print:] - [:graph:] plus non-control spaces */ +#define PT_PXPUNCT 15 /* [:punct:] - punctuation characters */ /* Flag bits and data types for the extended class (OP_XCLASS) for classes that contain characters with values greater than 255. */ -#define XCL_NOT 0x01 /* Flag: this is a negative class */ -#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ -#define XCL_HASPROP 0x04 /* Flag: property checks are present. */ +#define XCL_NOT 0x01 /* Flag: this is a negative class */ +#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ +#define XCL_HASPROP 0x04 /* Flag: property checks are present. */ -#define XCL_END 0 /* Marks end of individual items */ -#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ -#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ -#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ -#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ +#define XCL_END 0 /* Marks end of individual items */ +#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ +#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ +#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ +#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ /* These are escaped items that aren't just an encoding of a particular data value such as \n. They must have non-zero values, as check_escape() returns 0 @@ -1828,8 +1830,11 @@ typedef struct { property. The remaining bits hold the bidi class, but as there are only 23 classes, we can mask off 5 bits - leaving two free for the future. */ -#define UCD_BIDICLASS(ch) (GET_UCD(ch)->bidi & 0x1fu) -#define UCD_BIDICONTROL(ch) (GET_UCD(ch)->bidi & 0x80u) +#define UCD_BIDICLASS_MASK 0x1fu +#define UCD_BIDICONTROL_BIT 0x80u + +#define UCD_BIDICLASS(ch) (GET_UCD(ch)->bidi & UCD_BIDICLASS_MASK) +#define UCD_BIDICONTROL(ch) (GET_UCD(ch)->bidi & UCD_BIDICONTROL_BIT) /* Header for serialized pcre2 codes. */ diff --git a/src/pcre2_match.c b/src/pcre2_match.c index 7cfa44c..710d4a2 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -159,7 +159,8 @@ enum { RM100=100, RM101 }; #ifdef SUPPORT_UNICODE enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207, RM208, RM209, RM210, RM211, RM212, RM213, RM214, RM215, - RM216, RM217, RM218, RM219, RM220, RM221, RM222 }; + RM216, RM217, RM218, RM219, RM220, RM221, RM222, RM223, + RM224 }; #endif /* Define short names for general fields in the current backtrack frame, which @@ -2503,6 +2504,16 @@ fprintf(stderr, "++ op=%d\n", *Fecode); RRETURN(MATCH_NOMATCH); break; + case PT_BIDICO: + if (((prop->bidi & UCD_BIDICONTROL_BIT) != 0) == (Fop == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + break; + + case PT_BIDICL: + if (((prop->bidi & UCD_BIDICLASS_MASK) == Fecode[2]) == (Fop == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + break; + /* This should never occur */ default: @@ -2804,6 +2815,34 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } break; + case PT_BIDICO: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_BIDICONTROL(fc) != 0) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_BIDICL: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + break; + /* This should not occur */ default: @@ -3562,6 +3601,40 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } /* Control never gets here */ + case PT_BIDICO: + for (;;) + { + RMATCH(Fecode, RM223); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_BIDICONTROL(fc) != 0) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + case PT_BIDICL: + for (;;) + { + RMATCH(Fecode, RM224); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + /* This should never occur */ default: return PCRE2_ERROR_INTERNAL; @@ -4076,6 +4149,38 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } break; + case PT_BIDICO: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + if ((UCD_BIDICONTROL(fc) != 0) == (Lctype == OP_NOTPROP)) + break; + Feptr+= len; + } + break; + + case PT_BIDICL: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + break; + Feptr+= len; + } + break; + default: return PCRE2_ERROR_INTERNAL; } @@ -6066,7 +6171,7 @@ switch (Freturn_id) LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206) LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213) LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220) - LBL(221) LBL(222) + LBL(221) LBL(222) LBL(223) LBL(224) #endif default: diff --git a/src/pcre2_printint.c b/src/pcre2_printint.c index b9bab02..017c6e0 100644 --- a/src/pcre2_printint.c +++ b/src/pcre2_printint.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2019 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -273,8 +273,8 @@ print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after) { if (code[1] != PT_CLIST) { - fprintf(f, "%s%s %s%s", before, OP_names[*code], get_ucpname(code[1], - code[2]), after); + const char *s = get_ucpname(code[1], code[2]); + fprintf(f, "%s%s %c%s%s", before, OP_names[*code], toupper(s[0]), s+1, after); } else { @@ -724,6 +724,7 @@ for(;;) { unsigned int ptype = *ccode++; unsigned int pvalue = *ccode++; + const char *s; switch(ptype) { @@ -740,8 +741,8 @@ for(;;) break; default: - fprintf(f, "\\%c{%s}", (not? 'P':'p'), - get_ucpname(ptype, pvalue)); + s = get_ucpname(ptype, pvalue); + fprintf(f, "\\%c{%c%s}", (not? 'P':'p'), toupper(s[0]), s+1); break; } } diff --git a/src/pcre2_string_utils.c b/src/pcre2_string_utils.c index d6be01a..ebfa943 100644 --- a/src/pcre2_string_utils.c +++ b/src/pcre2_string_utils.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2018 University of Cambridge + New API code Copyright (c) 2018-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without diff --git a/src/pcre2_tables.c b/src/pcre2_tables.c index c164e97..1938db8 100644 --- a/src/pcre2_tables.c +++ b/src/pcre2_tables.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2019 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -236,630 +236,709 @@ to generate this data automatically instead of maintaining it by hand. The script was updated in March 2009 to generate a new EBCDIC-compliant version. Like all other character and string literals that are compared against the regular expression pattern, we must use STR_ macros instead of literal -strings to make sure that UTF-8 support works on EBCDIC platforms. */ +strings to make sure that UTF-8 support works on EBCDIC platforms. -#define STRING_Adlam0 STR_A STR_d STR_l STR_a STR_m "\0" -#define STRING_Ahom0 STR_A STR_h STR_o STR_m "\0" -#define STRING_Anatolian_Hieroglyphs0 STR_A STR_n STR_a STR_t STR_o STR_l STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" -#define STRING_Any0 STR_A STR_n STR_y "\0" -#define STRING_Arabic0 STR_A STR_r STR_a STR_b STR_i STR_c "\0" -#define STRING_Armenian0 STR_A STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0" -#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0" -#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0" -#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0" -#define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0" -#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0" -#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0" -#define STRING_Bhaiksuki0 STR_B STR_h STR_a STR_i STR_k STR_s STR_u STR_k STR_i "\0" -#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0" -#define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0" -#define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0" -#define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0" -#define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0" -#define STRING_C0 STR_C "\0" -#define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0" -#define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0" -#define STRING_Caucasian_Albanian0 STR_C STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0" -#define STRING_Cc0 STR_C STR_c "\0" -#define STRING_Cf0 STR_C STR_f "\0" -#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0" -#define STRING_Cham0 STR_C STR_h STR_a STR_m "\0" -#define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0" -#define STRING_Chorasmian0 STR_C STR_h STR_o STR_r STR_a STR_s STR_m STR_i STR_a STR_n "\0" -#define STRING_Cn0 STR_C STR_n "\0" -#define STRING_Co0 STR_C STR_o "\0" -#define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0" -#define STRING_Coptic0 STR_C STR_o STR_p STR_t STR_i STR_c "\0" -#define STRING_Cs0 STR_C STR_s "\0" -#define STRING_Cuneiform0 STR_C STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0" -#define STRING_Cypriot0 STR_C STR_y STR_p STR_r STR_i STR_o STR_t "\0" -#define STRING_Cypro_Minoan0 STR_C STR_y STR_p STR_r STR_o STR_UNDERSCORE STR_M STR_i STR_n STR_o STR_a STR_n "\0" -#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0" -#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0" -#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0" -#define STRING_Dives_Akuru0 STR_D STR_i STR_v STR_e STR_s STR_UNDERSCORE STR_A STR_k STR_u STR_r STR_u "\0" -#define STRING_Dogra0 STR_D STR_o STR_g STR_r STR_a "\0" -#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0" -#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" -#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0" -#define STRING_Elymaic0 STR_E STR_l STR_y STR_m STR_a STR_i STR_c "\0" -#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0" -#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0" -#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0" -#define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0" -#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0" -#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0" -#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0" -#define STRING_Gunjala_Gondi0 STR_G STR_u STR_n STR_j STR_a STR_l STR_a STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0" -#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0" -#define STRING_Han0 STR_H STR_a STR_n "\0" -#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0" -#define STRING_Hanifi_Rohingya0 STR_H STR_a STR_n STR_i STR_f STR_i STR_UNDERSCORE STR_R STR_o STR_h STR_i STR_n STR_g STR_y STR_a "\0" -#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0" -#define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0" -#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0" -#define STRING_Hiragana0 STR_H STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0" -#define STRING_Imperial_Aramaic0 STR_I STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_UNDERSCORE STR_A STR_r STR_a STR_m STR_a STR_i STR_c "\0" -#define STRING_Inherited0 STR_I STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0" -#define STRING_Inscriptional_Pahlavi0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0" -#define STRING_Inscriptional_Parthian0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0" -#define STRING_Javanese0 STR_J STR_a STR_v STR_a STR_n STR_e STR_s STR_e "\0" -#define STRING_Kaithi0 STR_K STR_a STR_i STR_t STR_h STR_i "\0" -#define STRING_Kannada0 STR_K STR_a STR_n STR_n STR_a STR_d STR_a "\0" -#define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0" -#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0" -#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0" -#define STRING_Khitan_Small_Script0 STR_K STR_h STR_i STR_t STR_a STR_n STR_UNDERSCORE STR_S STR_m STR_a STR_l STR_l STR_UNDERSCORE STR_S STR_c STR_r STR_i STR_p STR_t "\0" -#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0" -#define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0" -#define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0" -#define STRING_L0 STR_L "\0" -#define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0" -#define STRING_Lao0 STR_L STR_a STR_o "\0" -#define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0" -#define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0" -#define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0" -#define STRING_Linear_A0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_A "\0" -#define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0" -#define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0" -#define STRING_Ll0 STR_L STR_l "\0" -#define STRING_Lm0 STR_L STR_m "\0" -#define STRING_Lo0 STR_L STR_o "\0" -#define STRING_Lt0 STR_L STR_t "\0" -#define STRING_Lu0 STR_L STR_u "\0" -#define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0" -#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0" -#define STRING_M0 STR_M "\0" -#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0" -#define STRING_Makasar0 STR_M STR_a STR_k STR_a STR_s STR_a STR_r "\0" -#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0" -#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0" -#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0" -#define STRING_Marchen0 STR_M STR_a STR_r STR_c STR_h STR_e STR_n "\0" -#define STRING_Masaram_Gondi0 STR_M STR_a STR_s STR_a STR_r STR_a STR_m STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0" -#define STRING_Mc0 STR_M STR_c "\0" -#define STRING_Me0 STR_M STR_e "\0" -#define STRING_Medefaidrin0 STR_M STR_e STR_d STR_e STR_f STR_a STR_i STR_d STR_r STR_i STR_n "\0" -#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0" -#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0" -#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0" -#define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" -#define STRING_Miao0 STR_M STR_i STR_a STR_o "\0" -#define STRING_Mn0 STR_M STR_n "\0" -#define STRING_Modi0 STR_M STR_o STR_d STR_i "\0" -#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0" -#define STRING_Mro0 STR_M STR_r STR_o "\0" -#define STRING_Multani0 STR_M STR_u STR_l STR_t STR_a STR_n STR_i "\0" -#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0" -#define STRING_N0 STR_N "\0" -#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0" -#define STRING_Nandinagari0 STR_N STR_a STR_n STR_d STR_i STR_n STR_a STR_g STR_a STR_r STR_i "\0" -#define STRING_Nd0 STR_N STR_d "\0" -#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0" -#define STRING_Newa0 STR_N STR_e STR_w STR_a "\0" -#define STRING_Nko0 STR_N STR_k STR_o "\0" -#define STRING_Nl0 STR_N STR_l "\0" -#define STRING_No0 STR_N STR_o "\0" -#define STRING_Nushu0 STR_N STR_u STR_s STR_h STR_u "\0" -#define STRING_Nyiakeng_Puachue_Hmong0 STR_N STR_y STR_i STR_a STR_k STR_e STR_n STR_g STR_UNDERSCORE STR_P STR_u STR_a STR_c STR_h STR_u STR_e STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0" -#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0" -#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0" -#define STRING_Old_Hungarian0 STR_O STR_l STR_d STR_UNDERSCORE STR_H STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0" -#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0" -#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0" -#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0" -#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0" -#define STRING_Old_Sogdian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0" -#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0" -#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0" -#define STRING_Old_Uyghur0 STR_O STR_l STR_d STR_UNDERSCORE STR_U STR_y STR_g STR_h STR_u STR_r "\0" -#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0" -#define STRING_Osage0 STR_O STR_s STR_a STR_g STR_e "\0" -#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0" -#define STRING_P0 STR_P "\0" -#define STRING_Pahawh_Hmong0 STR_P STR_a STR_h STR_a STR_w STR_h STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0" -#define STRING_Palmyrene0 STR_P STR_a STR_l STR_m STR_y STR_r STR_e STR_n STR_e "\0" -#define STRING_Pau_Cin_Hau0 STR_P STR_a STR_u STR_UNDERSCORE STR_C STR_i STR_n STR_UNDERSCORE STR_H STR_a STR_u "\0" -#define STRING_Pc0 STR_P STR_c "\0" -#define STRING_Pd0 STR_P STR_d "\0" -#define STRING_Pe0 STR_P STR_e "\0" -#define STRING_Pf0 STR_P STR_f "\0" -#define STRING_Phags_Pa0 STR_P STR_h STR_a STR_g STR_s STR_UNDERSCORE STR_P STR_a "\0" -#define STRING_Phoenician0 STR_P STR_h STR_o STR_e STR_n STR_i STR_c STR_i STR_a STR_n "\0" -#define STRING_Pi0 STR_P STR_i "\0" -#define STRING_Po0 STR_P STR_o "\0" -#define STRING_Ps0 STR_P STR_s "\0" -#define STRING_Psalter_Pahlavi0 STR_P STR_s STR_a STR_l STR_t STR_e STR_r STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0" -#define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0" -#define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0" -#define STRING_S0 STR_S "\0" -#define STRING_Samaritan0 STR_S STR_a STR_m STR_a STR_r STR_i STR_t STR_a STR_n "\0" -#define STRING_Saurashtra0 STR_S STR_a STR_u STR_r STR_a STR_s STR_h STR_t STR_r STR_a "\0" -#define STRING_Sc0 STR_S STR_c "\0" -#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0" -#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0" -#define STRING_Siddham0 STR_S STR_i STR_d STR_d STR_h STR_a STR_m "\0" -#define STRING_SignWriting0 STR_S STR_i STR_g STR_n STR_W STR_r STR_i STR_t STR_i STR_n STR_g "\0" -#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0" -#define STRING_Sk0 STR_S STR_k "\0" -#define STRING_Sm0 STR_S STR_m "\0" -#define STRING_So0 STR_S STR_o "\0" -#define STRING_Sogdian0 STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0" -#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0" -#define STRING_Soyombo0 STR_S STR_o STR_y STR_o STR_m STR_b STR_o "\0" -#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0" -#define STRING_Syloti_Nagri0 STR_S STR_y STR_l STR_o STR_t STR_i STR_UNDERSCORE STR_N STR_a STR_g STR_r STR_i "\0" -#define STRING_Syriac0 STR_S STR_y STR_r STR_i STR_a STR_c "\0" -#define STRING_Tagalog0 STR_T STR_a STR_g STR_a STR_l STR_o STR_g "\0" -#define STRING_Tagbanwa0 STR_T STR_a STR_g STR_b STR_a STR_n STR_w STR_a "\0" -#define STRING_Tai_Le0 STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_e "\0" -#define STRING_Tai_Tham0 STR_T STR_a STR_i STR_UNDERSCORE STR_T STR_h STR_a STR_m "\0" -#define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0" -#define STRING_Takri0 STR_T STR_a STR_k STR_r STR_i "\0" -#define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0" -#define STRING_Tangsa0 STR_T STR_a STR_n STR_g STR_s STR_a "\0" -#define STRING_Tangut0 STR_T STR_a STR_n STR_g STR_u STR_t "\0" -#define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0" -#define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0" -#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0" -#define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0" -#define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0" -#define STRING_Tirhuta0 STR_T STR_i STR_r STR_h STR_u STR_t STR_a "\0" -#define STRING_Toto0 STR_T STR_o STR_t STR_o "\0" -#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0" -#define STRING_Unknown0 STR_U STR_n STR_k STR_n STR_o STR_w STR_n "\0" -#define STRING_Vai0 STR_V STR_a STR_i "\0" -#define STRING_Vithkuqi0 STR_V STR_i STR_t STR_h STR_k STR_u STR_q STR_i "\0" -#define STRING_Wancho0 STR_W STR_a STR_n STR_c STR_h STR_o "\0" -#define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0" -#define STRING_Xan0 STR_X STR_a STR_n "\0" -#define STRING_Xps0 STR_X STR_p STR_s "\0" -#define STRING_Xsp0 STR_X STR_s STR_p "\0" -#define STRING_Xuc0 STR_X STR_u STR_c "\0" -#define STRING_Xwd0 STR_X STR_w STR_d "\0" -#define STRING_Yezidi0 STR_Y STR_e STR_z STR_i STR_d STR_i "\0" -#define STRING_Yi0 STR_Y STR_i "\0" -#define STRING_Z0 STR_Z "\0" -#define STRING_Zanabazar_Square0 STR_Z STR_a STR_n STR_a STR_b STR_a STR_z STR_a STR_r STR_UNDERSCORE STR_S STR_q STR_u STR_a STR_r STR_e "\0" -#define STRING_Zl0 STR_Z STR_l "\0" -#define STRING_Zp0 STR_Z STR_p "\0" -#define STRING_Zs0 STR_Z STR_s "\0" +December 2021: the script now ensures that all letters are lower cased, and +that underscores are removed, in accordance with the "loose matching" rules +that Unicode advises and Perl uses. */ + +#define STRING_adlam0 STR_a STR_d STR_l STR_a STR_m "\0" +#define STRING_ahom0 STR_a STR_h STR_o STR_m "\0" +#define STRING_anatolianhieroglyphs0 STR_a STR_n STR_a STR_t STR_o STR_l STR_i STR_a STR_n STR_h STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" +#define STRING_any0 STR_a STR_n STR_y "\0" +#define STRING_arabic0 STR_a STR_r STR_a STR_b STR_i STR_c "\0" +#define STRING_armenian0 STR_a STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0" +#define STRING_avestan0 STR_a STR_v STR_e STR_s STR_t STR_a STR_n "\0" +#define STRING_balinese0 STR_b STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0" +#define STRING_bamum0 STR_b STR_a STR_m STR_u STR_m "\0" +#define STRING_bassavah0 STR_b STR_a STR_s STR_s STR_a STR_v STR_a STR_h "\0" +#define STRING_batak0 STR_b STR_a STR_t STR_a STR_k "\0" +#define STRING_bengali0 STR_b STR_e STR_n STR_g STR_a STR_l STR_i "\0" +#define STRING_bhaiksuki0 STR_b STR_h STR_a STR_i STR_k STR_s STR_u STR_k STR_i "\0" +#define STRING_bidial0 STR_b STR_i STR_d STR_i STR_a STR_l "\0" +#define STRING_bidian0 STR_b STR_i STR_d STR_i STR_a STR_n "\0" +#define STRING_bidib0 STR_b STR_i STR_d STR_i STR_b "\0" +#define STRING_bidibn0 STR_b STR_i STR_d STR_i STR_b STR_n "\0" +#define STRING_bidicontrol0 STR_b STR_i STR_d STR_i STR_c STR_o STR_n STR_t STR_r STR_o STR_l "\0" +#define STRING_bidics0 STR_b STR_i STR_d STR_i STR_c STR_s "\0" +#define STRING_bidien0 STR_b STR_i STR_d STR_i STR_e STR_n "\0" +#define STRING_bidies0 STR_b STR_i STR_d STR_i STR_e STR_s "\0" +#define STRING_bidiet0 STR_b STR_i STR_d STR_i STR_e STR_t "\0" +#define STRING_bidifsi0 STR_b STR_i STR_d STR_i STR_f STR_s STR_i "\0" +#define STRING_bidil0 STR_b STR_i STR_d STR_i STR_l "\0" +#define STRING_bidilre0 STR_b STR_i STR_d STR_i STR_l STR_r STR_e "\0" +#define STRING_bidilri0 STR_b STR_i STR_d STR_i STR_l STR_r STR_i "\0" +#define STRING_bidilro0 STR_b STR_i STR_d STR_i STR_l STR_r STR_o "\0" +#define STRING_bidinsm0 STR_b STR_i STR_d STR_i STR_n STR_s STR_m "\0" +#define STRING_bidion0 STR_b STR_i STR_d STR_i STR_o STR_n "\0" +#define STRING_bidipdf0 STR_b STR_i STR_d STR_i STR_p STR_d STR_f "\0" +#define STRING_bidipdi0 STR_b STR_i STR_d STR_i STR_p STR_d STR_i "\0" +#define STRING_bidir0 STR_b STR_i STR_d STR_i STR_r "\0" +#define STRING_bidirle0 STR_b STR_i STR_d STR_i STR_r STR_l STR_e "\0" +#define STRING_bidirli0 STR_b STR_i STR_d STR_i STR_r STR_l STR_i "\0" +#define STRING_bidirlo0 STR_b STR_i STR_d STR_i STR_r STR_l STR_o "\0" +#define STRING_bidis0 STR_b STR_i STR_d STR_i STR_s "\0" +#define STRING_bidiws0 STR_b STR_i STR_d STR_i STR_w STR_s "\0" +#define STRING_bopomofo0 STR_b STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0" +#define STRING_brahmi0 STR_b STR_r STR_a STR_h STR_m STR_i "\0" +#define STRING_braille0 STR_b STR_r STR_a STR_i STR_l STR_l STR_e "\0" +#define STRING_buginese0 STR_b STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0" +#define STRING_buhid0 STR_b STR_u STR_h STR_i STR_d "\0" +#define STRING_c0 STR_c "\0" +#define STRING_canadianaboriginal0 STR_c STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_a STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0" +#define STRING_carian0 STR_c STR_a STR_r STR_i STR_a STR_n "\0" +#define STRING_caucasianalbanian0 STR_c STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_a STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0" +#define STRING_cc0 STR_c STR_c "\0" +#define STRING_cf0 STR_c STR_f "\0" +#define STRING_chakma0 STR_c STR_h STR_a STR_k STR_m STR_a "\0" +#define STRING_cham0 STR_c STR_h STR_a STR_m "\0" +#define STRING_cherokee0 STR_c STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0" +#define STRING_chorasmian0 STR_c STR_h STR_o STR_r STR_a STR_s STR_m STR_i STR_a STR_n "\0" +#define STRING_cn0 STR_c STR_n "\0" +#define STRING_co0 STR_c STR_o "\0" +#define STRING_common0 STR_c STR_o STR_m STR_m STR_o STR_n "\0" +#define STRING_coptic0 STR_c STR_o STR_p STR_t STR_i STR_c "\0" +#define STRING_cs0 STR_c STR_s "\0" +#define STRING_cuneiform0 STR_c STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0" +#define STRING_cypriot0 STR_c STR_y STR_p STR_r STR_i STR_o STR_t "\0" +#define STRING_cyprominoan0 STR_c STR_y STR_p STR_r STR_o STR_m STR_i STR_n STR_o STR_a STR_n "\0" +#define STRING_cyrillic0 STR_c STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0" +#define STRING_deseret0 STR_d STR_e STR_s STR_e STR_r STR_e STR_t "\0" +#define STRING_devanagari0 STR_d STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0" +#define STRING_divesakuru0 STR_d STR_i STR_v STR_e STR_s STR_a STR_k STR_u STR_r STR_u "\0" +#define STRING_dogra0 STR_d STR_o STR_g STR_r STR_a "\0" +#define STRING_duployan0 STR_d STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0" +#define STRING_egyptianhieroglyphs0 STR_e STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_h STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" +#define STRING_elbasan0 STR_e STR_l STR_b STR_a STR_s STR_a STR_n "\0" +#define STRING_elymaic0 STR_e STR_l STR_y STR_m STR_a STR_i STR_c "\0" +#define STRING_ethiopic0 STR_e STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0" +#define STRING_georgian0 STR_g STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0" +#define STRING_glagolitic0 STR_g STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0" +#define STRING_gothic0 STR_g STR_o STR_t STR_h STR_i STR_c "\0" +#define STRING_grantha0 STR_g STR_r STR_a STR_n STR_t STR_h STR_a "\0" +#define STRING_greek0 STR_g STR_r STR_e STR_e STR_k "\0" +#define STRING_gujarati0 STR_g STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0" +#define STRING_gunjalagondi0 STR_g STR_u STR_n STR_j STR_a STR_l STR_a STR_g STR_o STR_n STR_d STR_i "\0" +#define STRING_gurmukhi0 STR_g STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0" +#define STRING_han0 STR_h STR_a STR_n "\0" +#define STRING_hangul0 STR_h STR_a STR_n STR_g STR_u STR_l "\0" +#define STRING_hanifirohingya0 STR_h STR_a STR_n STR_i STR_f STR_i STR_r STR_o STR_h STR_i STR_n STR_g STR_y STR_a "\0" +#define STRING_hanunoo0 STR_h STR_a STR_n STR_u STR_n STR_o STR_o "\0" +#define STRING_hatran0 STR_h STR_a STR_t STR_r STR_a STR_n "\0" +#define STRING_hebrew0 STR_h STR_e STR_b STR_r STR_e STR_w "\0" +#define STRING_hiragana0 STR_h STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0" +#define STRING_imperialaramaic0 STR_i STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_a STR_r STR_a STR_m STR_a STR_i STR_c "\0" +#define STRING_inherited0 STR_i STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0" +#define STRING_inscriptionalpahlavi0 STR_i STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_p STR_a STR_h STR_l STR_a STR_v STR_i "\0" +#define STRING_inscriptionalparthian0 STR_i STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_p STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0" +#define STRING_javanese0 STR_j STR_a STR_v STR_a STR_n STR_e STR_s STR_e "\0" +#define STRING_kaithi0 STR_k STR_a STR_i STR_t STR_h STR_i "\0" +#define STRING_kannada0 STR_k STR_a STR_n STR_n STR_a STR_d STR_a "\0" +#define STRING_katakana0 STR_k STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0" +#define STRING_kayahli0 STR_k STR_a STR_y STR_a STR_h STR_l STR_i "\0" +#define STRING_kharoshthi0 STR_k STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0" +#define STRING_khitansmallscript0 STR_k STR_h STR_i STR_t STR_a STR_n STR_s STR_m STR_a STR_l STR_l STR_s STR_c STR_r STR_i STR_p STR_t "\0" +#define STRING_khmer0 STR_k STR_h STR_m STR_e STR_r "\0" +#define STRING_khojki0 STR_k STR_h STR_o STR_j STR_k STR_i "\0" +#define STRING_khudawadi0 STR_k STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0" +#define STRING_l0 STR_l "\0" +#define STRING_l_AMPERSAND0 STR_l STR_AMPERSAND "\0" +#define STRING_lao0 STR_l STR_a STR_o "\0" +#define STRING_latin0 STR_l STR_a STR_t STR_i STR_n "\0" +#define STRING_lc0 STR_l STR_c "\0" +#define STRING_lepcha0 STR_l STR_e STR_p STR_c STR_h STR_a "\0" +#define STRING_limbu0 STR_l STR_i STR_m STR_b STR_u "\0" +#define STRING_lineara0 STR_l STR_i STR_n STR_e STR_a STR_r STR_a "\0" +#define STRING_linearb0 STR_l STR_i STR_n STR_e STR_a STR_r STR_b "\0" +#define STRING_lisu0 STR_l STR_i STR_s STR_u "\0" +#define STRING_ll0 STR_l STR_l "\0" +#define STRING_lm0 STR_l STR_m "\0" +#define STRING_lo0 STR_l STR_o "\0" +#define STRING_lt0 STR_l STR_t "\0" +#define STRING_lu0 STR_l STR_u "\0" +#define STRING_lycian0 STR_l STR_y STR_c STR_i STR_a STR_n "\0" +#define STRING_lydian0 STR_l STR_y STR_d STR_i STR_a STR_n "\0" +#define STRING_m0 STR_m "\0" +#define STRING_mahajani0 STR_m STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0" +#define STRING_makasar0 STR_m STR_a STR_k STR_a STR_s STR_a STR_r "\0" +#define STRING_malayalam0 STR_m STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0" +#define STRING_mandaic0 STR_m STR_a STR_n STR_d STR_a STR_i STR_c "\0" +#define STRING_manichaean0 STR_m STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0" +#define STRING_marchen0 STR_m STR_a STR_r STR_c STR_h STR_e STR_n "\0" +#define STRING_masaramgondi0 STR_m STR_a STR_s STR_a STR_r STR_a STR_m STR_g STR_o STR_n STR_d STR_i "\0" +#define STRING_mc0 STR_m STR_c "\0" +#define STRING_me0 STR_m STR_e "\0" +#define STRING_medefaidrin0 STR_m STR_e STR_d STR_e STR_f STR_a STR_i STR_d STR_r STR_i STR_n "\0" +#define STRING_meeteimayek0 STR_m STR_e STR_e STR_t STR_e STR_i STR_m STR_a STR_y STR_e STR_k "\0" +#define STRING_mendekikakui0 STR_m STR_e STR_n STR_d STR_e STR_k STR_i STR_k STR_a STR_k STR_u STR_i "\0" +#define STRING_meroiticcursive0 STR_m STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_c STR_u STR_r STR_s STR_i STR_v STR_e "\0" +#define STRING_meroitichieroglyphs0 STR_m STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_h STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0" +#define STRING_miao0 STR_m STR_i STR_a STR_o "\0" +#define STRING_mn0 STR_m STR_n "\0" +#define STRING_modi0 STR_m STR_o STR_d STR_i "\0" +#define STRING_mongolian0 STR_m STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0" +#define STRING_mro0 STR_m STR_r STR_o "\0" +#define STRING_multani0 STR_m STR_u STR_l STR_t STR_a STR_n STR_i "\0" +#define STRING_myanmar0 STR_m STR_y STR_a STR_n STR_m STR_a STR_r "\0" +#define STRING_n0 STR_n "\0" +#define STRING_nabataean0 STR_n STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0" +#define STRING_nandinagari0 STR_n STR_a STR_n STR_d STR_i STR_n STR_a STR_g STR_a STR_r STR_i "\0" +#define STRING_nd0 STR_n STR_d "\0" +#define STRING_newa0 STR_n STR_e STR_w STR_a "\0" +#define STRING_newtailue0 STR_n STR_e STR_w STR_t STR_a STR_i STR_l STR_u STR_e "\0" +#define STRING_nko0 STR_n STR_k STR_o "\0" +#define STRING_nl0 STR_n STR_l "\0" +#define STRING_no0 STR_n STR_o "\0" +#define STRING_nushu0 STR_n STR_u STR_s STR_h STR_u "\0" +#define STRING_nyiakengpuachuehmong0 STR_n STR_y STR_i STR_a STR_k STR_e STR_n STR_g STR_p STR_u STR_a STR_c STR_h STR_u STR_e STR_h STR_m STR_o STR_n STR_g "\0" +#define STRING_ogham0 STR_o STR_g STR_h STR_a STR_m "\0" +#define STRING_olchiki0 STR_o STR_l STR_c STR_h STR_i STR_k STR_i "\0" +#define STRING_oldhungarian0 STR_o STR_l STR_d STR_h STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0" +#define STRING_olditalic0 STR_o STR_l STR_d STR_i STR_t STR_a STR_l STR_i STR_c "\0" +#define STRING_oldnortharabian0 STR_o STR_l STR_d STR_n STR_o STR_r STR_t STR_h STR_a STR_r STR_a STR_b STR_i STR_a STR_n "\0" +#define STRING_oldpermic0 STR_o STR_l STR_d STR_p STR_e STR_r STR_m STR_i STR_c "\0" +#define STRING_oldpersian0 STR_o STR_l STR_d STR_p STR_e STR_r STR_s STR_i STR_a STR_n "\0" +#define STRING_oldsogdian0 STR_o STR_l STR_d STR_s STR_o STR_g STR_d STR_i STR_a STR_n "\0" +#define STRING_oldsoutharabian0 STR_o STR_l STR_d STR_s STR_o STR_u STR_t STR_h STR_a STR_r STR_a STR_b STR_i STR_a STR_n "\0" +#define STRING_oldturkic0 STR_o STR_l STR_d STR_t STR_u STR_r STR_k STR_i STR_c "\0" +#define STRING_olduyghur0 STR_o STR_l STR_d STR_u STR_y STR_g STR_h STR_u STR_r "\0" +#define STRING_oriya0 STR_o STR_r STR_i STR_y STR_a "\0" +#define STRING_osage0 STR_o STR_s STR_a STR_g STR_e "\0" +#define STRING_osmanya0 STR_o STR_s STR_m STR_a STR_n STR_y STR_a "\0" +#define STRING_p0 STR_p "\0" +#define STRING_pahawhhmong0 STR_p STR_a STR_h STR_a STR_w STR_h STR_h STR_m STR_o STR_n STR_g "\0" +#define STRING_palmyrene0 STR_p STR_a STR_l STR_m STR_y STR_r STR_e STR_n STR_e "\0" +#define STRING_paucinhau0 STR_p STR_a STR_u STR_c STR_i STR_n STR_h STR_a STR_u "\0" +#define STRING_pc0 STR_p STR_c "\0" +#define STRING_pd0 STR_p STR_d "\0" +#define STRING_pe0 STR_p STR_e "\0" +#define STRING_pf0 STR_p STR_f "\0" +#define STRING_phagspa0 STR_p STR_h STR_a STR_g STR_s STR_p STR_a "\0" +#define STRING_phoenician0 STR_p STR_h STR_o STR_e STR_n STR_i STR_c STR_i STR_a STR_n "\0" +#define STRING_pi0 STR_p STR_i "\0" +#define STRING_po0 STR_p STR_o "\0" +#define STRING_ps0 STR_p STR_s "\0" +#define STRING_psalterpahlavi0 STR_p STR_s STR_a STR_l STR_t STR_e STR_r STR_p STR_a STR_h STR_l STR_a STR_v STR_i "\0" +#define STRING_rejang0 STR_r STR_e STR_j STR_a STR_n STR_g "\0" +#define STRING_runic0 STR_r STR_u STR_n STR_i STR_c "\0" +#define STRING_s0 STR_s "\0" +#define STRING_samaritan0 STR_s STR_a STR_m STR_a STR_r STR_i STR_t STR_a STR_n "\0" +#define STRING_saurashtra0 STR_s STR_a STR_u STR_r STR_a STR_s STR_h STR_t STR_r STR_a "\0" +#define STRING_sc0 STR_s STR_c "\0" +#define STRING_sharada0 STR_s STR_h STR_a STR_r STR_a STR_d STR_a "\0" +#define STRING_shavian0 STR_s STR_h STR_a STR_v STR_i STR_a STR_n "\0" +#define STRING_siddham0 STR_s STR_i STR_d STR_d STR_h STR_a STR_m "\0" +#define STRING_signwriting0 STR_s STR_i STR_g STR_n STR_w STR_r STR_i STR_t STR_i STR_n STR_g "\0" +#define STRING_sinhala0 STR_s STR_i STR_n STR_h STR_a STR_l STR_a "\0" +#define STRING_sk0 STR_s STR_k "\0" +#define STRING_sm0 STR_s STR_m "\0" +#define STRING_so0 STR_s STR_o "\0" +#define STRING_sogdian0 STR_s STR_o STR_g STR_d STR_i STR_a STR_n "\0" +#define STRING_sorasompeng0 STR_s STR_o STR_r STR_a STR_s STR_o STR_m STR_p STR_e STR_n STR_g "\0" +#define STRING_soyombo0 STR_s STR_o STR_y STR_o STR_m STR_b STR_o "\0" +#define STRING_sundanese0 STR_s STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0" +#define STRING_sylotinagri0 STR_s STR_y STR_l STR_o STR_t STR_i STR_n STR_a STR_g STR_r STR_i "\0" +#define STRING_syriac0 STR_s STR_y STR_r STR_i STR_a STR_c "\0" +#define STRING_tagalog0 STR_t STR_a STR_g STR_a STR_l STR_o STR_g "\0" +#define STRING_tagbanwa0 STR_t STR_a STR_g STR_b STR_a STR_n STR_w STR_a "\0" +#define STRING_taile0 STR_t STR_a STR_i STR_l STR_e "\0" +#define STRING_taitham0 STR_t STR_a STR_i STR_t STR_h STR_a STR_m "\0" +#define STRING_taiviet0 STR_t STR_a STR_i STR_v STR_i STR_e STR_t "\0" +#define STRING_takri0 STR_t STR_a STR_k STR_r STR_i "\0" +#define STRING_tamil0 STR_t STR_a STR_m STR_i STR_l "\0" +#define STRING_tangsa0 STR_t STR_a STR_n STR_g STR_s STR_a "\0" +#define STRING_tangut0 STR_t STR_a STR_n STR_g STR_u STR_t "\0" +#define STRING_telugu0 STR_t STR_e STR_l STR_u STR_g STR_u "\0" +#define STRING_thaana0 STR_t STR_h STR_a STR_a STR_n STR_a "\0" +#define STRING_thai0 STR_t STR_h STR_a STR_i "\0" +#define STRING_tibetan0 STR_t STR_i STR_b STR_e STR_t STR_a STR_n "\0" +#define STRING_tifinagh0 STR_t STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0" +#define STRING_tirhuta0 STR_t STR_i STR_r STR_h STR_u STR_t STR_a "\0" +#define STRING_toto0 STR_t STR_o STR_t STR_o "\0" +#define STRING_ugaritic0 STR_u STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0" +#define STRING_unknown0 STR_u STR_n STR_k STR_n STR_o STR_w STR_n "\0" +#define STRING_vai0 STR_v STR_a STR_i "\0" +#define STRING_vithkuqi0 STR_v STR_i STR_t STR_h STR_k STR_u STR_q STR_i "\0" +#define STRING_wancho0 STR_w STR_a STR_n STR_c STR_h STR_o "\0" +#define STRING_warangciti0 STR_w STR_a STR_r STR_a STR_n STR_g STR_c STR_i STR_t STR_i "\0" +#define STRING_xan0 STR_x STR_a STR_n "\0" +#define STRING_xps0 STR_x STR_p STR_s "\0" +#define STRING_xsp0 STR_x STR_s STR_p "\0" +#define STRING_xuc0 STR_x STR_u STR_c "\0" +#define STRING_xwd0 STR_x STR_w STR_d "\0" +#define STRING_yezidi0 STR_y STR_e STR_z STR_i STR_d STR_i "\0" +#define STRING_yi0 STR_y STR_i "\0" +#define STRING_z0 STR_z "\0" +#define STRING_zanabazarsquare0 STR_z STR_a STR_n STR_a STR_b STR_a STR_z STR_a STR_r STR_s STR_q STR_u STR_a STR_r STR_e "\0" +#define STRING_zl0 STR_z STR_l "\0" +#define STRING_zp0 STR_z STR_p "\0" +#define STRING_zs0 STR_z STR_s "\0" const char PRIV(utt_names)[] = - STRING_Adlam0 - STRING_Ahom0 - STRING_Anatolian_Hieroglyphs0 - STRING_Any0 - STRING_Arabic0 - STRING_Armenian0 - STRING_Avestan0 - STRING_Balinese0 - STRING_Bamum0 - STRING_Bassa_Vah0 - STRING_Batak0 - STRING_Bengali0 - STRING_Bhaiksuki0 - STRING_Bopomofo0 - STRING_Brahmi0 - STRING_Braille0 - STRING_Buginese0 - STRING_Buhid0 - STRING_C0 - STRING_Canadian_Aboriginal0 - STRING_Carian0 - STRING_Caucasian_Albanian0 - STRING_Cc0 - STRING_Cf0 - STRING_Chakma0 - STRING_Cham0 - STRING_Cherokee0 - STRING_Chorasmian0 - STRING_Cn0 - STRING_Co0 - STRING_Common0 - STRING_Coptic0 - STRING_Cs0 - STRING_Cuneiform0 - STRING_Cypriot0 - STRING_Cypro_Minoan0 - STRING_Cyrillic0 - STRING_Deseret0 - STRING_Devanagari0 - STRING_Dives_Akuru0 - STRING_Dogra0 - STRING_Duployan0 - STRING_Egyptian_Hieroglyphs0 - STRING_Elbasan0 - STRING_Elymaic0 - STRING_Ethiopic0 - STRING_Georgian0 - STRING_Glagolitic0 - STRING_Gothic0 - STRING_Grantha0 - STRING_Greek0 - STRING_Gujarati0 - STRING_Gunjala_Gondi0 - STRING_Gurmukhi0 - STRING_Han0 - STRING_Hangul0 - STRING_Hanifi_Rohingya0 - STRING_Hanunoo0 - STRING_Hatran0 - STRING_Hebrew0 - STRING_Hiragana0 - STRING_Imperial_Aramaic0 - STRING_Inherited0 - STRING_Inscriptional_Pahlavi0 - STRING_Inscriptional_Parthian0 - STRING_Javanese0 - STRING_Kaithi0 - STRING_Kannada0 - STRING_Katakana0 - STRING_Kayah_Li0 - STRING_Kharoshthi0 - STRING_Khitan_Small_Script0 - STRING_Khmer0 - STRING_Khojki0 - STRING_Khudawadi0 - STRING_L0 - STRING_L_AMPERSAND0 - STRING_Lao0 - STRING_Latin0 - STRING_Lepcha0 - STRING_Limbu0 - STRING_Linear_A0 - STRING_Linear_B0 - STRING_Lisu0 - STRING_Ll0 - STRING_Lm0 - STRING_Lo0 - STRING_Lt0 - STRING_Lu0 - STRING_Lycian0 - STRING_Lydian0 - STRING_M0 - STRING_Mahajani0 - STRING_Makasar0 - STRING_Malayalam0 - STRING_Mandaic0 - STRING_Manichaean0 - STRING_Marchen0 - STRING_Masaram_Gondi0 - STRING_Mc0 - STRING_Me0 - STRING_Medefaidrin0 - STRING_Meetei_Mayek0 - STRING_Mende_Kikakui0 - STRING_Meroitic_Cursive0 - STRING_Meroitic_Hieroglyphs0 - STRING_Miao0 - STRING_Mn0 - STRING_Modi0 - STRING_Mongolian0 - STRING_Mro0 - STRING_Multani0 - STRING_Myanmar0 - STRING_N0 - STRING_Nabataean0 - STRING_Nandinagari0 - STRING_Nd0 - STRING_New_Tai_Lue0 - STRING_Newa0 - STRING_Nko0 - STRING_Nl0 - STRING_No0 - STRING_Nushu0 - STRING_Nyiakeng_Puachue_Hmong0 - STRING_Ogham0 - STRING_Ol_Chiki0 - STRING_Old_Hungarian0 - STRING_Old_Italic0 - STRING_Old_North_Arabian0 - STRING_Old_Permic0 - STRING_Old_Persian0 - STRING_Old_Sogdian0 - STRING_Old_South_Arabian0 - STRING_Old_Turkic0 - STRING_Old_Uyghur0 - STRING_Oriya0 - STRING_Osage0 - STRING_Osmanya0 - STRING_P0 - STRING_Pahawh_Hmong0 - STRING_Palmyrene0 - STRING_Pau_Cin_Hau0 - STRING_Pc0 - STRING_Pd0 - STRING_Pe0 - STRING_Pf0 - STRING_Phags_Pa0 - STRING_Phoenician0 - STRING_Pi0 - STRING_Po0 - STRING_Ps0 - STRING_Psalter_Pahlavi0 - STRING_Rejang0 - STRING_Runic0 - STRING_S0 - STRING_Samaritan0 - STRING_Saurashtra0 - STRING_Sc0 - STRING_Sharada0 - STRING_Shavian0 - STRING_Siddham0 - STRING_SignWriting0 - STRING_Sinhala0 - STRING_Sk0 - STRING_Sm0 - STRING_So0 - STRING_Sogdian0 - STRING_Sora_Sompeng0 - STRING_Soyombo0 - STRING_Sundanese0 - STRING_Syloti_Nagri0 - STRING_Syriac0 - STRING_Tagalog0 - STRING_Tagbanwa0 - STRING_Tai_Le0 - STRING_Tai_Tham0 - STRING_Tai_Viet0 - STRING_Takri0 - STRING_Tamil0 - STRING_Tangsa0 - STRING_Tangut0 - STRING_Telugu0 - STRING_Thaana0 - STRING_Thai0 - STRING_Tibetan0 - STRING_Tifinagh0 - STRING_Tirhuta0 - STRING_Toto0 - STRING_Ugaritic0 - STRING_Unknown0 - STRING_Vai0 - STRING_Vithkuqi0 - STRING_Wancho0 - STRING_Warang_Citi0 - STRING_Xan0 - STRING_Xps0 - STRING_Xsp0 - STRING_Xuc0 - STRING_Xwd0 - STRING_Yezidi0 - STRING_Yi0 - STRING_Z0 - STRING_Zanabazar_Square0 - STRING_Zl0 - STRING_Zp0 - STRING_Zs0; + STRING_adlam0 + STRING_ahom0 + STRING_anatolianhieroglyphs0 + STRING_any0 + STRING_arabic0 + STRING_armenian0 + STRING_avestan0 + STRING_balinese0 + STRING_bamum0 + STRING_bassavah0 + STRING_batak0 + STRING_bengali0 + STRING_bhaiksuki0 + STRING_bidial0 + STRING_bidian0 + STRING_bidib0 + STRING_bidibn0 + STRING_bidicontrol0 + STRING_bidics0 + STRING_bidien0 + STRING_bidies0 + STRING_bidiet0 + STRING_bidifsi0 + STRING_bidil0 + STRING_bidilre0 + STRING_bidilri0 + STRING_bidilro0 + STRING_bidinsm0 + STRING_bidion0 + STRING_bidipdf0 + STRING_bidipdi0 + STRING_bidir0 + STRING_bidirle0 + STRING_bidirli0 + STRING_bidirlo0 + STRING_bidis0 + STRING_bidiws0 + STRING_bopomofo0 + STRING_brahmi0 + STRING_braille0 + STRING_buginese0 + STRING_buhid0 + STRING_c0 + STRING_canadianaboriginal0 + STRING_carian0 + STRING_caucasianalbanian0 + STRING_cc0 + STRING_cf0 + STRING_chakma0 + STRING_cham0 + STRING_cherokee0 + STRING_chorasmian0 + STRING_cn0 + STRING_co0 + STRING_common0 + STRING_coptic0 + STRING_cs0 + STRING_cuneiform0 + STRING_cypriot0 + STRING_cyprominoan0 + STRING_cyrillic0 + STRING_deseret0 + STRING_devanagari0 + STRING_divesakuru0 + STRING_dogra0 + STRING_duployan0 + STRING_egyptianhieroglyphs0 + STRING_elbasan0 + STRING_elymaic0 + STRING_ethiopic0 + STRING_georgian0 + STRING_glagolitic0 + STRING_gothic0 + STRING_grantha0 + STRING_greek0 + STRING_gujarati0 + STRING_gunjalagondi0 + STRING_gurmukhi0 + STRING_han0 + STRING_hangul0 + STRING_hanifirohingya0 + STRING_hanunoo0 + STRING_hatran0 + STRING_hebrew0 + STRING_hiragana0 + STRING_imperialaramaic0 + STRING_inherited0 + STRING_inscriptionalpahlavi0 + STRING_inscriptionalparthian0 + STRING_javanese0 + STRING_kaithi0 + STRING_kannada0 + STRING_katakana0 + STRING_kayahli0 + STRING_kharoshthi0 + STRING_khitansmallscript0 + STRING_khmer0 + STRING_khojki0 + STRING_khudawadi0 + STRING_l0 + STRING_l_AMPERSAND0 + STRING_lao0 + STRING_latin0 + STRING_lc0 + STRING_lepcha0 + STRING_limbu0 + STRING_lineara0 + STRING_linearb0 + STRING_lisu0 + STRING_ll0 + STRING_lm0 + STRING_lo0 + STRING_lt0 + STRING_lu0 + STRING_lycian0 + STRING_lydian0 + STRING_m0 + STRING_mahajani0 + STRING_makasar0 + STRING_malayalam0 + STRING_mandaic0 + STRING_manichaean0 + STRING_marchen0 + STRING_masaramgondi0 + STRING_mc0 + STRING_me0 + STRING_medefaidrin0 + STRING_meeteimayek0 + STRING_mendekikakui0 + STRING_meroiticcursive0 + STRING_meroitichieroglyphs0 + STRING_miao0 + STRING_mn0 + STRING_modi0 + STRING_mongolian0 + STRING_mro0 + STRING_multani0 + STRING_myanmar0 + STRING_n0 + STRING_nabataean0 + STRING_nandinagari0 + STRING_nd0 + STRING_newa0 + STRING_newtailue0 + STRING_nko0 + STRING_nl0 + STRING_no0 + STRING_nushu0 + STRING_nyiakengpuachuehmong0 + STRING_ogham0 + STRING_olchiki0 + STRING_oldhungarian0 + STRING_olditalic0 + STRING_oldnortharabian0 + STRING_oldpermic0 + STRING_oldpersian0 + STRING_oldsogdian0 + STRING_oldsoutharabian0 + STRING_oldturkic0 + STRING_olduyghur0 + STRING_oriya0 + STRING_osage0 + STRING_osmanya0 + STRING_p0 + STRING_pahawhhmong0 + STRING_palmyrene0 + STRING_paucinhau0 + STRING_pc0 + STRING_pd0 + STRING_pe0 + STRING_pf0 + STRING_phagspa0 + STRING_phoenician0 + STRING_pi0 + STRING_po0 + STRING_ps0 + STRING_psalterpahlavi0 + STRING_rejang0 + STRING_runic0 + STRING_s0 + STRING_samaritan0 + STRING_saurashtra0 + STRING_sc0 + STRING_sharada0 + STRING_shavian0 + STRING_siddham0 + STRING_signwriting0 + STRING_sinhala0 + STRING_sk0 + STRING_sm0 + STRING_so0 + STRING_sogdian0 + STRING_sorasompeng0 + STRING_soyombo0 + STRING_sundanese0 + STRING_sylotinagri0 + STRING_syriac0 + STRING_tagalog0 + STRING_tagbanwa0 + STRING_taile0 + STRING_taitham0 + STRING_taiviet0 + STRING_takri0 + STRING_tamil0 + STRING_tangsa0 + STRING_tangut0 + STRING_telugu0 + STRING_thaana0 + STRING_thai0 + STRING_tibetan0 + STRING_tifinagh0 + STRING_tirhuta0 + STRING_toto0 + STRING_ugaritic0 + STRING_unknown0 + STRING_vai0 + STRING_vithkuqi0 + STRING_wancho0 + STRING_warangciti0 + STRING_xan0 + STRING_xps0 + STRING_xsp0 + STRING_xuc0 + STRING_xwd0 + STRING_yezidi0 + STRING_yi0 + STRING_z0 + STRING_zanabazarsquare0 + STRING_zl0 + STRING_zp0 + STRING_zs0; const ucp_type_table PRIV(utt)[] = { { 0, PT_SC, ucp_Adlam }, { 6, PT_SC, ucp_Ahom }, { 11, PT_SC, ucp_Anatolian_Hieroglyphs }, - { 33, PT_ANY, 0 }, - { 37, PT_SC, ucp_Arabic }, - { 44, PT_SC, ucp_Armenian }, - { 53, PT_SC, ucp_Avestan }, - { 61, PT_SC, ucp_Balinese }, - { 70, PT_SC, ucp_Bamum }, - { 76, PT_SC, ucp_Bassa_Vah }, - { 86, PT_SC, ucp_Batak }, - { 92, PT_SC, ucp_Bengali }, - { 100, PT_SC, ucp_Bhaiksuki }, - { 110, PT_SC, ucp_Bopomofo }, - { 119, PT_SC, ucp_Brahmi }, - { 126, PT_SC, ucp_Braille }, - { 134, PT_SC, ucp_Buginese }, - { 143, PT_SC, ucp_Buhid }, - { 149, PT_GC, ucp_C }, - { 151, PT_SC, ucp_Canadian_Aboriginal }, - { 171, PT_SC, ucp_Carian }, - { 178, PT_SC, ucp_Caucasian_Albanian }, - { 197, PT_PC, ucp_Cc }, - { 200, PT_PC, ucp_Cf }, - { 203, PT_SC, ucp_Chakma }, - { 210, PT_SC, ucp_Cham }, - { 215, PT_SC, ucp_Cherokee }, - { 224, PT_SC, ucp_Chorasmian }, - { 235, PT_PC, ucp_Cn }, - { 238, PT_PC, ucp_Co }, - { 241, PT_SC, ucp_Common }, - { 248, PT_SC, ucp_Coptic }, - { 255, PT_PC, ucp_Cs }, - { 258, PT_SC, ucp_Cuneiform }, - { 268, PT_SC, ucp_Cypriot }, - { 276, PT_SC, ucp_Cypro_Minoan }, - { 289, PT_SC, ucp_Cyrillic }, - { 298, PT_SC, ucp_Deseret }, - { 306, PT_SC, ucp_Devanagari }, - { 317, PT_SC, ucp_Dives_Akuru }, - { 329, PT_SC, ucp_Dogra }, - { 335, PT_SC, ucp_Duployan }, - { 344, PT_SC, ucp_Egyptian_Hieroglyphs }, - { 365, PT_SC, ucp_Elbasan }, - { 373, PT_SC, ucp_Elymaic }, - { 381, PT_SC, ucp_Ethiopic }, - { 390, PT_SC, ucp_Georgian }, - { 399, PT_SC, ucp_Glagolitic }, - { 410, PT_SC, ucp_Gothic }, - { 417, PT_SC, ucp_Grantha }, - { 425, PT_SC, ucp_Greek }, - { 431, PT_SC, ucp_Gujarati }, - { 440, PT_SC, ucp_Gunjala_Gondi }, - { 454, PT_SC, ucp_Gurmukhi }, - { 463, PT_SC, ucp_Han }, - { 467, PT_SC, ucp_Hangul }, - { 474, PT_SC, ucp_Hanifi_Rohingya }, - { 490, PT_SC, ucp_Hanunoo }, - { 498, PT_SC, ucp_Hatran }, - { 505, PT_SC, ucp_Hebrew }, - { 512, PT_SC, ucp_Hiragana }, - { 521, PT_SC, ucp_Imperial_Aramaic }, - { 538, PT_SC, ucp_Inherited }, - { 548, PT_SC, ucp_Inscriptional_Pahlavi }, - { 570, PT_SC, ucp_Inscriptional_Parthian }, - { 593, PT_SC, ucp_Javanese }, - { 602, PT_SC, ucp_Kaithi }, - { 609, PT_SC, ucp_Kannada }, - { 617, PT_SC, ucp_Katakana }, - { 626, PT_SC, ucp_Kayah_Li }, - { 635, PT_SC, ucp_Kharoshthi }, - { 646, PT_SC, ucp_Khitan_Small_Script }, - { 666, PT_SC, ucp_Khmer }, - { 672, PT_SC, ucp_Khojki }, - { 679, PT_SC, ucp_Khudawadi }, - { 689, PT_GC, ucp_L }, - { 691, PT_LAMP, 0 }, - { 694, PT_SC, ucp_Lao }, - { 698, PT_SC, ucp_Latin }, - { 704, PT_SC, ucp_Lepcha }, - { 711, PT_SC, ucp_Limbu }, - { 717, PT_SC, ucp_Linear_A }, - { 726, PT_SC, ucp_Linear_B }, - { 735, PT_SC, ucp_Lisu }, - { 740, PT_PC, ucp_Ll }, - { 743, PT_PC, ucp_Lm }, - { 746, PT_PC, ucp_Lo }, - { 749, PT_PC, ucp_Lt }, - { 752, PT_PC, ucp_Lu }, - { 755, PT_SC, ucp_Lycian }, - { 762, PT_SC, ucp_Lydian }, - { 769, PT_GC, ucp_M }, - { 771, PT_SC, ucp_Mahajani }, - { 780, PT_SC, ucp_Makasar }, - { 788, PT_SC, ucp_Malayalam }, - { 798, PT_SC, ucp_Mandaic }, - { 806, PT_SC, ucp_Manichaean }, - { 817, PT_SC, ucp_Marchen }, - { 825, PT_SC, ucp_Masaram_Gondi }, - { 839, PT_PC, ucp_Mc }, - { 842, PT_PC, ucp_Me }, - { 845, PT_SC, ucp_Medefaidrin }, - { 857, PT_SC, ucp_Meetei_Mayek }, - { 870, PT_SC, ucp_Mende_Kikakui }, - { 884, PT_SC, ucp_Meroitic_Cursive }, - { 901, PT_SC, ucp_Meroitic_Hieroglyphs }, - { 922, PT_SC, ucp_Miao }, - { 927, PT_PC, ucp_Mn }, - { 930, PT_SC, ucp_Modi }, - { 935, PT_SC, ucp_Mongolian }, - { 945, PT_SC, ucp_Mro }, - { 949, PT_SC, ucp_Multani }, - { 957, PT_SC, ucp_Myanmar }, - { 965, PT_GC, ucp_N }, - { 967, PT_SC, ucp_Nabataean }, - { 977, PT_SC, ucp_Nandinagari }, - { 989, PT_PC, ucp_Nd }, - { 992, PT_SC, ucp_New_Tai_Lue }, - { 1004, PT_SC, ucp_Newa }, - { 1009, PT_SC, ucp_Nko }, - { 1013, PT_PC, ucp_Nl }, - { 1016, PT_PC, ucp_No }, - { 1019, PT_SC, ucp_Nushu }, - { 1025, PT_SC, ucp_Nyiakeng_Puachue_Hmong }, - { 1048, PT_SC, ucp_Ogham }, - { 1054, PT_SC, ucp_Ol_Chiki }, - { 1063, PT_SC, ucp_Old_Hungarian }, - { 1077, PT_SC, ucp_Old_Italic }, - { 1088, PT_SC, ucp_Old_North_Arabian }, - { 1106, PT_SC, ucp_Old_Permic }, - { 1117, PT_SC, ucp_Old_Persian }, - { 1129, PT_SC, ucp_Old_Sogdian }, - { 1141, PT_SC, ucp_Old_South_Arabian }, - { 1159, PT_SC, ucp_Old_Turkic }, - { 1170, PT_SC, ucp_Old_Uyghur }, - { 1181, PT_SC, ucp_Oriya }, - { 1187, PT_SC, ucp_Osage }, - { 1193, PT_SC, ucp_Osmanya }, - { 1201, PT_GC, ucp_P }, - { 1203, PT_SC, ucp_Pahawh_Hmong }, - { 1216, PT_SC, ucp_Palmyrene }, - { 1226, PT_SC, ucp_Pau_Cin_Hau }, - { 1238, PT_PC, ucp_Pc }, - { 1241, PT_PC, ucp_Pd }, - { 1244, PT_PC, ucp_Pe }, - { 1247, PT_PC, ucp_Pf }, - { 1250, PT_SC, ucp_Phags_Pa }, - { 1259, PT_SC, ucp_Phoenician }, - { 1270, PT_PC, ucp_Pi }, - { 1273, PT_PC, ucp_Po }, - { 1276, PT_PC, ucp_Ps }, - { 1279, PT_SC, ucp_Psalter_Pahlavi }, - { 1295, PT_SC, ucp_Rejang }, - { 1302, PT_SC, ucp_Runic }, - { 1308, PT_GC, ucp_S }, - { 1310, PT_SC, ucp_Samaritan }, - { 1320, PT_SC, ucp_Saurashtra }, - { 1331, PT_PC, ucp_Sc }, - { 1334, PT_SC, ucp_Sharada }, - { 1342, PT_SC, ucp_Shavian }, - { 1350, PT_SC, ucp_Siddham }, - { 1358, PT_SC, ucp_SignWriting }, - { 1370, PT_SC, ucp_Sinhala }, - { 1378, PT_PC, ucp_Sk }, - { 1381, PT_PC, ucp_Sm }, - { 1384, PT_PC, ucp_So }, - { 1387, PT_SC, ucp_Sogdian }, - { 1395, PT_SC, ucp_Sora_Sompeng }, - { 1408, PT_SC, ucp_Soyombo }, - { 1416, PT_SC, ucp_Sundanese }, - { 1426, PT_SC, ucp_Syloti_Nagri }, - { 1439, PT_SC, ucp_Syriac }, - { 1446, PT_SC, ucp_Tagalog }, - { 1454, PT_SC, ucp_Tagbanwa }, - { 1463, PT_SC, ucp_Tai_Le }, - { 1470, PT_SC, ucp_Tai_Tham }, - { 1479, PT_SC, ucp_Tai_Viet }, - { 1488, PT_SC, ucp_Takri }, - { 1494, PT_SC, ucp_Tamil }, - { 1500, PT_SC, ucp_Tangsa }, - { 1507, PT_SC, ucp_Tangut }, - { 1514, PT_SC, ucp_Telugu }, - { 1521, PT_SC, ucp_Thaana }, - { 1528, PT_SC, ucp_Thai }, - { 1533, PT_SC, ucp_Tibetan }, - { 1541, PT_SC, ucp_Tifinagh }, - { 1550, PT_SC, ucp_Tirhuta }, - { 1558, PT_SC, ucp_Toto }, - { 1563, PT_SC, ucp_Ugaritic }, - { 1572, PT_SC, ucp_Unknown }, - { 1580, PT_SC, ucp_Vai }, - { 1584, PT_SC, ucp_Vithkuqi }, - { 1593, PT_SC, ucp_Wancho }, - { 1600, PT_SC, ucp_Warang_Citi }, - { 1612, PT_ALNUM, 0 }, - { 1616, PT_PXSPACE, 0 }, - { 1620, PT_SPACE, 0 }, - { 1624, PT_UCNC, 0 }, - { 1628, PT_WORD, 0 }, - { 1632, PT_SC, ucp_Yezidi }, - { 1639, PT_SC, ucp_Yi }, - { 1642, PT_GC, ucp_Z }, - { 1644, PT_SC, ucp_Zanabazar_Square }, - { 1661, PT_PC, ucp_Zl }, - { 1664, PT_PC, ucp_Zp }, - { 1667, PT_PC, ucp_Zs } + { 32, PT_ANY, 0 }, + { 36, PT_SC, ucp_Arabic }, + { 43, PT_SC, ucp_Armenian }, + { 52, PT_SC, ucp_Avestan }, + { 60, PT_SC, ucp_Balinese }, + { 69, PT_SC, ucp_Bamum }, + { 75, PT_SC, ucp_Bassa_Vah }, + { 84, PT_SC, ucp_Batak }, + { 90, PT_SC, ucp_Bengali }, + { 98, PT_SC, ucp_Bhaiksuki }, + { 108, PT_BIDICL, ucp_bidiAL }, + { 115, PT_BIDICL, ucp_bidiAN }, + { 122, PT_BIDICL, ucp_bidiB }, + { 128, PT_BIDICL, ucp_bidiBN }, + { 135, PT_BIDICO, 0 }, + { 147, PT_BIDICL, ucp_bidiCS }, + { 154, PT_BIDICL, ucp_bidiEN }, + { 161, PT_BIDICL, ucp_bidiES }, + { 168, PT_BIDICL, ucp_bidiET }, + { 175, PT_BIDICL, ucp_bidiFSI }, + { 183, PT_BIDICL, ucp_bidiL }, + { 189, PT_BIDICL, ucp_bidiLRE }, + { 197, PT_BIDICL, ucp_bidiLRI }, + { 205, PT_BIDICL, ucp_bidiLRO }, + { 213, PT_BIDICL, ucp_bidiNSM }, + { 221, PT_BIDICL, ucp_bidiON }, + { 228, PT_BIDICL, ucp_bidiPDF }, + { 236, PT_BIDICL, ucp_bidiPDI }, + { 244, PT_BIDICL, ucp_bidiR }, + { 250, PT_BIDICL, ucp_bidiRLE }, + { 258, PT_BIDICL, ucp_bidiRLI }, + { 266, PT_BIDICL, ucp_bidiRLO }, + { 274, PT_BIDICL, ucp_bidiS }, + { 280, PT_BIDICL, ucp_bidiWS }, + { 287, PT_SC, ucp_Bopomofo }, + { 296, PT_SC, ucp_Brahmi }, + { 303, PT_SC, ucp_Braille }, + { 311, PT_SC, ucp_Buginese }, + { 320, PT_SC, ucp_Buhid }, + { 326, PT_GC, ucp_C }, + { 328, PT_SC, ucp_Canadian_Aboriginal }, + { 347, PT_SC, ucp_Carian }, + { 354, PT_SC, ucp_Caucasian_Albanian }, + { 372, PT_PC, ucp_Cc }, + { 375, PT_PC, ucp_Cf }, + { 378, PT_SC, ucp_Chakma }, + { 385, PT_SC, ucp_Cham }, + { 390, PT_SC, ucp_Cherokee }, + { 399, PT_SC, ucp_Chorasmian }, + { 410, PT_PC, ucp_Cn }, + { 413, PT_PC, ucp_Co }, + { 416, PT_SC, ucp_Common }, + { 423, PT_SC, ucp_Coptic }, + { 430, PT_PC, ucp_Cs }, + { 433, PT_SC, ucp_Cuneiform }, + { 443, PT_SC, ucp_Cypriot }, + { 451, PT_SC, ucp_Cypro_Minoan }, + { 463, PT_SC, ucp_Cyrillic }, + { 472, PT_SC, ucp_Deseret }, + { 480, PT_SC, ucp_Devanagari }, + { 491, PT_SC, ucp_Dives_Akuru }, + { 502, PT_SC, ucp_Dogra }, + { 508, PT_SC, ucp_Duployan }, + { 517, PT_SC, ucp_Egyptian_Hieroglyphs }, + { 537, PT_SC, ucp_Elbasan }, + { 545, PT_SC, ucp_Elymaic }, + { 553, PT_SC, ucp_Ethiopic }, + { 562, PT_SC, ucp_Georgian }, + { 571, PT_SC, ucp_Glagolitic }, + { 582, PT_SC, ucp_Gothic }, + { 589, PT_SC, ucp_Grantha }, + { 597, PT_SC, ucp_Greek }, + { 603, PT_SC, ucp_Gujarati }, + { 612, PT_SC, ucp_Gunjala_Gondi }, + { 625, PT_SC, ucp_Gurmukhi }, + { 634, PT_SC, ucp_Han }, + { 638, PT_SC, ucp_Hangul }, + { 645, PT_SC, ucp_Hanifi_Rohingya }, + { 660, PT_SC, ucp_Hanunoo }, + { 668, PT_SC, ucp_Hatran }, + { 675, PT_SC, ucp_Hebrew }, + { 682, PT_SC, ucp_Hiragana }, + { 691, PT_SC, ucp_Imperial_Aramaic }, + { 707, PT_SC, ucp_Inherited }, + { 717, PT_SC, ucp_Inscriptional_Pahlavi }, + { 738, PT_SC, ucp_Inscriptional_Parthian }, + { 760, PT_SC, ucp_Javanese }, + { 769, PT_SC, ucp_Kaithi }, + { 776, PT_SC, ucp_Kannada }, + { 784, PT_SC, ucp_Katakana }, + { 793, PT_SC, ucp_Kayah_Li }, + { 801, PT_SC, ucp_Kharoshthi }, + { 812, PT_SC, ucp_Khitan_Small_Script }, + { 830, PT_SC, ucp_Khmer }, + { 836, PT_SC, ucp_Khojki }, + { 843, PT_SC, ucp_Khudawadi }, + { 853, PT_GC, ucp_L }, + { 855, PT_LAMP, 0 }, + { 858, PT_SC, ucp_Lao }, + { 862, PT_SC, ucp_Latin }, + { 868, PT_LAMP, 0 }, + { 871, PT_SC, ucp_Lepcha }, + { 878, PT_SC, ucp_Limbu }, + { 884, PT_SC, ucp_Linear_A }, + { 892, PT_SC, ucp_Linear_B }, + { 900, PT_SC, ucp_Lisu }, + { 905, PT_PC, ucp_Ll }, + { 908, PT_PC, ucp_Lm }, + { 911, PT_PC, ucp_Lo }, + { 914, PT_PC, ucp_Lt }, + { 917, PT_PC, ucp_Lu }, + { 920, PT_SC, ucp_Lycian }, + { 927, PT_SC, ucp_Lydian }, + { 934, PT_GC, ucp_M }, + { 936, PT_SC, ucp_Mahajani }, + { 945, PT_SC, ucp_Makasar }, + { 953, PT_SC, ucp_Malayalam }, + { 963, PT_SC, ucp_Mandaic }, + { 971, PT_SC, ucp_Manichaean }, + { 982, PT_SC, ucp_Marchen }, + { 990, PT_SC, ucp_Masaram_Gondi }, + { 1003, PT_PC, ucp_Mc }, + { 1006, PT_PC, ucp_Me }, + { 1009, PT_SC, ucp_Medefaidrin }, + { 1021, PT_SC, ucp_Meetei_Mayek }, + { 1033, PT_SC, ucp_Mende_Kikakui }, + { 1046, PT_SC, ucp_Meroitic_Cursive }, + { 1062, PT_SC, ucp_Meroitic_Hieroglyphs }, + { 1082, PT_SC, ucp_Miao }, + { 1087, PT_PC, ucp_Mn }, + { 1090, PT_SC, ucp_Modi }, + { 1095, PT_SC, ucp_Mongolian }, + { 1105, PT_SC, ucp_Mro }, + { 1109, PT_SC, ucp_Multani }, + { 1117, PT_SC, ucp_Myanmar }, + { 1125, PT_GC, ucp_N }, + { 1127, PT_SC, ucp_Nabataean }, + { 1137, PT_SC, ucp_Nandinagari }, + { 1149, PT_PC, ucp_Nd }, + { 1152, PT_SC, ucp_Newa }, + { 1157, PT_SC, ucp_New_Tai_Lue }, + { 1167, PT_SC, ucp_Nko }, + { 1171, PT_PC, ucp_Nl }, + { 1174, PT_PC, ucp_No }, + { 1177, PT_SC, ucp_Nushu }, + { 1183, PT_SC, ucp_Nyiakeng_Puachue_Hmong }, + { 1204, PT_SC, ucp_Ogham }, + { 1210, PT_SC, ucp_Ol_Chiki }, + { 1218, PT_SC, ucp_Old_Hungarian }, + { 1231, PT_SC, ucp_Old_Italic }, + { 1241, PT_SC, ucp_Old_North_Arabian }, + { 1257, PT_SC, ucp_Old_Permic }, + { 1267, PT_SC, ucp_Old_Persian }, + { 1278, PT_SC, ucp_Old_Sogdian }, + { 1289, PT_SC, ucp_Old_South_Arabian }, + { 1305, PT_SC, ucp_Old_Turkic }, + { 1315, PT_SC, ucp_Old_Uyghur }, + { 1325, PT_SC, ucp_Oriya }, + { 1331, PT_SC, ucp_Osage }, + { 1337, PT_SC, ucp_Osmanya }, + { 1345, PT_GC, ucp_P }, + { 1347, PT_SC, ucp_Pahawh_Hmong }, + { 1359, PT_SC, ucp_Palmyrene }, + { 1369, PT_SC, ucp_Pau_Cin_Hau }, + { 1379, PT_PC, ucp_Pc }, + { 1382, PT_PC, ucp_Pd }, + { 1385, PT_PC, ucp_Pe }, + { 1388, PT_PC, ucp_Pf }, + { 1391, PT_SC, ucp_Phags_Pa }, + { 1399, PT_SC, ucp_Phoenician }, + { 1410, PT_PC, ucp_Pi }, + { 1413, PT_PC, ucp_Po }, + { 1416, PT_PC, ucp_Ps }, + { 1419, PT_SC, ucp_Psalter_Pahlavi }, + { 1434, PT_SC, ucp_Rejang }, + { 1441, PT_SC, ucp_Runic }, + { 1447, PT_GC, ucp_S }, + { 1449, PT_SC, ucp_Samaritan }, + { 1459, PT_SC, ucp_Saurashtra }, + { 1470, PT_PC, ucp_Sc }, + { 1473, PT_SC, ucp_Sharada }, + { 1481, PT_SC, ucp_Shavian }, + { 1489, PT_SC, ucp_Siddham }, + { 1497, PT_SC, ucp_SignWriting }, + { 1509, PT_SC, ucp_Sinhala }, + { 1517, PT_PC, ucp_Sk }, + { 1520, PT_PC, ucp_Sm }, + { 1523, PT_PC, ucp_So }, + { 1526, PT_SC, ucp_Sogdian }, + { 1534, PT_SC, ucp_Sora_Sompeng }, + { 1546, PT_SC, ucp_Soyombo }, + { 1554, PT_SC, ucp_Sundanese }, + { 1564, PT_SC, ucp_Syloti_Nagri }, + { 1576, PT_SC, ucp_Syriac }, + { 1583, PT_SC, ucp_Tagalog }, + { 1591, PT_SC, ucp_Tagbanwa }, + { 1600, PT_SC, ucp_Tai_Le }, + { 1606, PT_SC, ucp_Tai_Tham }, + { 1614, PT_SC, ucp_Tai_Viet }, + { 1622, PT_SC, ucp_Takri }, + { 1628, PT_SC, ucp_Tamil }, + { 1634, PT_SC, ucp_Tangsa }, + { 1641, PT_SC, ucp_Tangut }, + { 1648, PT_SC, ucp_Telugu }, + { 1655, PT_SC, ucp_Thaana }, + { 1662, PT_SC, ucp_Thai }, + { 1667, PT_SC, ucp_Tibetan }, + { 1675, PT_SC, ucp_Tifinagh }, + { 1684, PT_SC, ucp_Tirhuta }, + { 1692, PT_SC, ucp_Toto }, + { 1697, PT_SC, ucp_Ugaritic }, + { 1706, PT_SC, ucp_Unknown }, + { 1714, PT_SC, ucp_Vai }, + { 1718, PT_SC, ucp_Vithkuqi }, + { 1727, PT_SC, ucp_Wancho }, + { 1734, PT_SC, ucp_Warang_Citi }, + { 1745, PT_ALNUM, 0 }, + { 1749, PT_PXSPACE, 0 }, + { 1753, PT_SPACE, 0 }, + { 1757, PT_UCNC, 0 }, + { 1761, PT_WORD, 0 }, + { 1765, PT_SC, ucp_Yezidi }, + { 1772, PT_SC, ucp_Yi }, + { 1775, PT_GC, ucp_Z }, + { 1777, PT_SC, ucp_Zanabazar_Square }, + { 1793, PT_PC, ucp_Zl }, + { 1796, PT_PC, ucp_Zp }, + { 1799, PT_PC, ucp_Zs } }; const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); diff --git a/src/pcre2_xclass.c b/src/pcre2_xclass.c index 8b052be..2b5e7cf 100644 --- a/src/pcre2_xclass.c +++ b/src/pcre2_xclass.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2019 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -206,6 +206,16 @@ while ((t = *data++) != XCL_END) return !negated; } break; + + case PT_BIDICO: + if (((prop->bidi & UCD_BIDICONTROL_BIT) != 0) == isprop) + return !negated; + break; + + case PT_BIDICL: + if (((prop->bidi & UCD_BIDICLASS_MASK) == data[1]) == isprop) + return !negated; + break; /* The following three properties can occur only in an XCLASS, as there is no \p or \P coding for them. */ diff --git a/src/pcre2test.c b/src/pcre2test.c index d5fde0d..e9d0148 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -441,6 +441,7 @@ enum { MOD_CTC, /* Applies to a compile context */ MOD_PAT, /* Applies to a pattern */ MOD_PATP, /* Ditto, OK for Perl test */ MOD_DAT, /* Applies to a data line */ + MOD_DATP, /* Ditto, OK for Perl test */ MOD_PD, /* Applies to a pattern or a data line */ MOD_PDP, /* As MOD_PD, OK for Perl test */ MOD_PND, /* As MOD_PD, but not for a default pattern */ @@ -700,7 +701,7 @@ static modstruct modlist[] = { { "no_auto_capture", MOD_PAT, MOD_OPT, PCRE2_NO_AUTO_CAPTURE, PO(options) }, { "no_auto_possess", MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS, PO(options) }, { "no_dotstar_anchor", MOD_PAT, MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR, PO(options) }, - { "no_jit", MOD_DAT, MOD_OPT, PCRE2_NO_JIT, DO(options) }, + { "no_jit", MOD_DATP, MOD_OPT, PCRE2_NO_JIT, DO(options) }, { "no_start_optimize", MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE, PO(options) }, { "no_utf_check", MOD_PD, MOD_OPT, PCRE2_NO_UTF_CHECK, PD(options) }, { "notbol", MOD_DAT, MOD_OPT, PCRE2_NOTBOL, DO(options) }, @@ -3583,6 +3584,7 @@ if (restrict_for_perl_test) switch(m->which) { case MOD_PNDP: case MOD_PATP: + case MOD_DATP: case MOD_PDP: break; @@ -3604,7 +3606,8 @@ switch (m->which) else if (ctx == CTX_DAT) field = PTR(dat_context); break; - case MOD_DAT: /* Data line modifier */ + case MOD_DAT: /* Data line modifier */ + case MOD_DATP: /* Allowed for Perl test */ if (dctl != NULL) field = dctl; break; diff --git a/testdata/testinput4 b/testdata/testinput4 index 4e2a0ab..de32d4d 100644 --- a/testdata/testinput4 +++ b/testdata/testinput4 @@ -2495,4 +2495,118 @@ \x{42f} \x{44f} +# ----------------------------------------------------------------------------- +# Tests for bidi control and bidi class properties, not yet supported by JIT. + +#subject no_jit + +/\p{ bidi_control }/utf + -->\x{202c}<-- + +/\p{bidicontrol}+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidicontrol}+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidicontrol}++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidi_control}]/utf + -->\x{202c}<-- + +/[\p{bidicontrol}]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidicontrol}]+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidicontrol}]++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidicontrol}<>]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\P{bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{^bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidi class = al}/utf + -->\x{061D}<-- + +/\p{bidi class = al}+/utf + -->\x{061D}\x{061e}\x{061f}<-- + +/\p{bidi_class : AL}+?/utf + -->\x{061D}\x{061e}\x{061f}<-- + +/\p{Bidi_Class : AL}++/utf + -->\x{061D}\x{061e}\x{061f}<-- + +/\p{bidi class = aN}+/utf + -->\x{061D}\x{0602}\x{0604}\x{061f}<-- + +/\p{bidi class = B}+/utf + -->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<-- + +/\p{bidi class:BN}+/utf + -->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<-- + +/\p{bidiclass:cs}+/utf + -->,.\x{060c}\x{ff1a}<-- + +/\p{bidiclass:En}+/utf + -->09\x{b2}\x{2074}\x{1fbf9}<-- + +/\p{bidiclass:es}+/utf + ==>+-\x{207a}\x{ff0d}<== + +/\p{bidiclass:et}+/utf + -->#\{24}%\x{a2}\x{A838}\x{1e2ff}<-- + +/\p{bidiclass:FSI}+/utf + -->\x{2068}<-- + +/\p{bidi class:L}+/utf + -->ABC<-- + +/\P{bidi class:L}+/utf + -->ABC<-- + +/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf + -->\x{202a}\x{2066}\x{202d}<-- + +/\p{bidi class:NSM}+/utf + -->\x{9bc}\x{a71}\x{e31}<-- + +/\p{bidi class:ON}+/utf + -->\x{21}'()*;@\x{384}\x{2039}<=- + +/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf + -->\x{202c}\x{2069}<-- + +/\p{bidi class:R}+/utf + -->\x{590}\x{5c6}\x{200f}\x{10805}<-- + +/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf + -->\x{202b}\x{2067}\x{202e}<-- + +/\p{bidi class:S}+\p{bidiclass:WS}+/utf + -->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<-- + +#subject -no_jit + +# ----------------------------------------------------------------------------- + # End of testinput4 diff --git a/testdata/testinput5 b/testdata/testinput5 index 9126236..da7a409 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -2188,4 +2188,8 @@ /(\xc1)\1/i,ucp \xc1\xe1\=no_jit +/\p{L&}+\p{bidi_control}/B + +/\p{bidi_control}+\p{L&}/B + # End of testinput5 diff --git a/testdata/testinput7 b/testdata/testinput7 index 194f655..6703314 100644 --- a/testdata/testinput7 +++ b/testdata/testinput7 @@ -2093,4 +2093,114 @@ /(?<=\x{100})\x{200}(?=\x{300})/utf,allusedtext \x{100}\x{200}\x{300} +# ----------------------------------------------------------------------------- +# Tests for bidi control and bidi class properties + +/\p{ bidi_control }/utf + -->\x{202c}<-- + +/\p{bidicontrol}+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidicontrol}+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidicontrol}++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidi_control}]/utf + -->\x{202c}<-- + +/[\p{bidicontrol}]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidicontrol}]+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidicontrol}]++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/[\p{bidicontrol}<>]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\P{bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{^bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + +/\p{bidi class = al}/utf + -->\x{061D}<-- + +/\p{bidi class = al}+/utf + -->\x{061D}\x{061e}\x{061f}<-- + +/\p{bidi_class : AL}+?/utf + -->\x{061D}\x{061e}\x{061f}<-- + +/\p{Bidi_Class : AL}++/utf + -->\x{061D}\x{061e}\x{061f}<-- + +/\p{bidi class = aN}+/utf + -->\x{061D}\x{0602}\x{0604}\x{061f}<-- + +/\p{bidi class = B}+/utf + -->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<-- + +/\p{bidi class:BN}+/utf + -->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<-- + +/\p{bidiclass:cs}+/utf + -->,.\x{060c}\x{ff1a}<-- + +/\p{bidiclass:En}+/utf + -->09\x{b2}\x{2074}\x{1fbf9}<-- + +/\p{bidiclass:es}+/utf + ==>+-\x{207a}\x{ff0d}<== + +/\p{bidiclass:et}+/utf + -->#\{24}%\x{a2}\x{A838}\x{1e2ff}<-- + +/\p{bidiclass:FSI}+/utf + -->\x{2068}<-- + +/\p{bidi class:L}+/utf + -->ABC<-- + +/\P{bidi class:L}+/utf + -->ABC<-- + +/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf + -->\x{202a}\x{2066}\x{202d}<-- + +/\p{bidi class:NSM}+/utf + -->\x{9bc}\x{a71}\x{e31}<-- + +/\p{bidi class:ON}+/utf + -->\x{21}'()*;@\x{384}\x{2039}<=- + +/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf + -->\x{202c}\x{2069}<-- + +/\p{bidi class:R}+/utf + -->\x{590}\x{5c6}\x{200f}\x{10805}<-- + +/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf + -->\x{202b}\x{2067}\x{202e}<-- + +/\p{bidi class:S}+\p{bidiclass:WS}+/utf + -->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<-- + +# ----------------------------------------------------------------------------- + # End of testinput7 diff --git a/testdata/testoutput4 b/testdata/testoutput4 index f43d940..8b6062d 100644 --- a/testdata/testoutput4 +++ b/testdata/testoutput4 @@ -4032,4 +4032,163 @@ No match \x{44f} 0: +# ----------------------------------------------------------------------------- +# Tests for bidi control and bidi class properties, not yet supported by JIT. + +#subject no_jit + +/\p{ bidi_control }/utf + -->\x{202c}<-- + 0: \x{202c} + +/\p{bidicontrol}+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/\p{bidicontrol}+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066} + +/\p{bidicontrol}++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/[\p{bidi_control}]/utf + -->\x{202c}<-- + 0: \x{202c} + +/[\p{bidicontrol}]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/[\p{bidicontrol}]+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066} + +/[\p{bidicontrol}]++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/[\p{bidicontrol}<>]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: >\x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}< + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: >\x{2066}\x{2067}\x{2068}\x{2069}< + +/\P{bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: --> + 0: <-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: --> + 0: <-- + +/\p{^bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: --> + 0: <-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: --> + 0: <-- + +/\p{bidi class = al}/utf + -->\x{061D}<-- + 0: \x{61d} + +/\p{bidi class = al}+/utf + -->\x{061D}\x{061e}\x{061f}<-- + 0: \x{61d}\x{61e}\x{61f} + +/\p{bidi_class : AL}+?/utf + -->\x{061D}\x{061e}\x{061f}<-- + 0: \x{61d} + +/\p{Bidi_Class : AL}++/utf + -->\x{061D}\x{061e}\x{061f}<-- + 0: \x{61d}\x{61e}\x{61f} + +/\p{bidi class = aN}+/utf + -->\x{061D}\x{0602}\x{0604}\x{061f}<-- + 0: \x{602}\x{604} + +/\p{bidi class = B}+/utf + -->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<-- + 0: \x{0a}\x{0d}\x{1c}\x{1e}\x{85}\x{2029} + +/\p{bidi class:BN}+/utf + -->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<-- + 0: \x{00}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff} + +/\p{bidiclass:cs}+/utf + -->,.\x{060c}\x{ff1a}<-- + 0: ,.\x{60c}\x{ff1a} + +/\p{bidiclass:En}+/utf + -->09\x{b2}\x{2074}\x{1fbf9}<-- + 0: 09\x{b2}\x{2074}\x{1fbf9} + +/\p{bidiclass:es}+/utf + ==>+-\x{207a}\x{ff0d}<== + 0: +-\x{207a}\x{ff0d} + +/\p{bidiclass:et}+/utf + -->#\{24}%\x{a2}\x{A838}\x{1e2ff}<-- + 0: # + +/\p{bidiclass:FSI}+/utf + -->\x{2068}<-- + 0: \x{2068} + +/\p{bidi class:L}+/utf + -->ABC<-- + 0: ABC + +/\P{bidi class:L}+/utf + -->ABC<-- + 0: --> + +/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf + -->\x{202a}\x{2066}\x{202d}<-- + 0: \x{202a}\x{2066}\x{202d} + +/\p{bidi class:NSM}+/utf + -->\x{9bc}\x{a71}\x{e31}<-- + 0: \x{9bc}\x{a71}\x{e31} + +/\p{bidi class:ON}+/utf + -->\x{21}'()*;@\x{384}\x{2039}<=- + 0: >!'()*;@\x{384}\x{2039}<= + +/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf + -->\x{202c}\x{2069}<-- + 0: \x{202c}\x{2069} + +/\p{bidi class:R}+/utf + -->\x{590}\x{5c6}\x{200f}\x{10805}<-- + 0: \x{590}\x{5c6}\x{200f}\x{10805} + +/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf + -->\x{202b}\x{2067}\x{202e}<-- + 0: \x{202b}\x{2067}\x{202e} + +/\p{bidi class:S}+\p{bidiclass:WS}+/utf + -->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<-- + 0: \x{09}\x{0b}\x{1f} \x{0c} \x{2000} \x{3000} + +#subject -no_jit + +# ----------------------------------------------------------------------------- + # End of testinput4 diff --git a/testdata/testoutput5 b/testdata/testoutput5 index b1842df..3a45eb8 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -3298,7 +3298,7 @@ No match AllAny+ notprop Any AllAny+ - prop L& + prop Lc AllAny+ prop L AllAny+ @@ -3322,29 +3322,29 @@ No match /\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra - prop L& + + prop Lc + AllAny - prop L& + - prop L& - notprop L& ++ - prop L& - prop L& + + prop Lc + + prop Lc + notprop Lc ++ + prop Lc + prop Lc + prop L - prop L& + + prop Lc + prop Lu - prop L& + + prop Lc + prop Han - prop L& + + prop Lc + prop Xan - prop L& ++ + prop Lc ++ notprop Xan - prop L& ++ + prop Lc ++ prop Xsp - prop L& ++ + prop Lc ++ prop Xps prop Xwd + - prop L& - prop L& + + prop Lc + prop Lc + prop Xuc Ket End @@ -3356,7 +3356,7 @@ No match prop N + AllAny prop N + - prop L& + prop Lc prop N ++ prop L prop N + @@ -3387,7 +3387,7 @@ No match prop Lu + AllAny prop Lu + - prop L& + prop Lc prop Lu + prop L prop Lu + @@ -3420,7 +3420,7 @@ No match prop Han + prop Lu prop Han + - prop L& + prop Lc prop Han + prop L prop Han + @@ -3449,9 +3449,9 @@ No match prop Xan + AllAny prop Xan + - prop L& + prop Lc notprop Xan ++ - prop L& + prop Lc prop Xan + prop L prop Xan + @@ -3480,7 +3480,7 @@ No match prop Xsp + AllAny prop Xsp ++ - prop L& + prop Lc prop Xsp ++ prop L prop Xsp ++ @@ -3509,7 +3509,7 @@ No match prop Xwd + AllAny prop Xwd + - prop L& + prop Lc prop Xwd + prop L prop Xwd + @@ -3538,7 +3538,7 @@ No match prop Xuc + AllAny prop Xuc + - prop L& + prop Lc prop Xuc + prop L prop Xuc + @@ -4949,4 +4949,22 @@ Subject length lower bound = 3 0: \xc1\xe1 1: \xc1 +/\p{L&}+\p{bidi_control}/B +------------------------------------------------------------------ + Bra + prop Lc ++ + prop Bidicontrol + Ket + End +------------------------------------------------------------------ + +/\p{bidi_control}+\p{L&}/B +------------------------------------------------------------------ + Bra + prop Bidicontrol ++ + prop Lc + Ket + End +------------------------------------------------------------------ + # End of testinput5 diff --git a/testdata/testoutput7 b/testdata/testoutput7 index 165c3a0..329ff31 100644 --- a/testdata/testoutput7 +++ b/testdata/testoutput7 @@ -3539,4 +3539,179 @@ No match 0: \x{100}\x{200}\x{300} <<<<<<< >>>>>>> +# ----------------------------------------------------------------------------- +# Tests for bidi control and bidi class properties + +/\p{ bidi_control }/utf + -->\x{202c}<-- + 0: \x{202c} + +/\p{bidicontrol}+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/\p{bidicontrol}+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + 1: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c} + 2: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b} + 3: \x{61c}\x{200e}\x{200f}\x{202a} + 4: \x{61c}\x{200e}\x{200f} + 5: \x{61c}\x{200e} + 6: \x{61c} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + 1: \x{2066}\x{2067}\x{2068} + 2: \x{2066}\x{2067} + 3: \x{2066} + +/\p{bidicontrol}++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/[\p{bidi_control}]/utf + -->\x{202c}<-- + 0: \x{202c} + +/[\p{bidicontrol}]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/[\p{bidicontrol}]+?/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + 1: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c} + 2: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b} + 3: \x{61c}\x{200e}\x{200f}\x{202a} + 4: \x{61c}\x{200e}\x{200f} + 5: \x{61c}\x{200e} + 6: \x{61c} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + 1: \x{2066}\x{2067}\x{2068} + 2: \x{2066}\x{2067} + 3: \x{2066} + +/[\p{bidicontrol}]++/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: \x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d} + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: \x{2066}\x{2067}\x{2068}\x{2069} + +/[\p{bidicontrol}<>]+/utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: >\x{61c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}< + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: >\x{2066}\x{2067}\x{2068}\x{2069}< + +/\P{bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: --> + 0: <-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: --> + 0: <-- + +/\p{^bidicontrol}+/g,utf + -->\x{061c}\x{200e}\x{200f}\x{202a}\x{202b}\x{202c}\x{202d}<-- + 0: --> + 0: <-- + -->\x{2066}\x{2067}\x{2068}\x{2069}<-- + 0: --> + 0: <-- + +/\p{bidi class = al}/utf + -->\x{061D}<-- + 0: \x{61d} + +/\p{bidi class = al}+/utf + -->\x{061D}\x{061e}\x{061f}<-- + 0: \x{61d}\x{61e}\x{61f} + +/\p{bidi_class : AL}+?/utf + -->\x{061D}\x{061e}\x{061f}<-- + 0: \x{61d}\x{61e}\x{61f} + 1: \x{61d}\x{61e} + 2: \x{61d} + +/\p{Bidi_Class : AL}++/utf + -->\x{061D}\x{061e}\x{061f}<-- + 0: \x{61d}\x{61e}\x{61f} + +/\p{bidi class = aN}+/utf + -->\x{061D}\x{0602}\x{0604}\x{061f}<-- + 0: \x{602}\x{604} + +/\p{bidi class = B}+/utf + -->\x{0a}\x{0d}\x{01c}\x{01e}\x{085}\x{2029}<-- + 0: \x{0a}\x{0d}\x{1c}\x{1e}\x{85}\x{2029} + +/\p{bidi class:BN}+/utf + -->\x{0}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff}<-- + 0: \x{00}\x{08}\x{200c}\x{fffe}\x{dfffe}\x{10ffff} + +/\p{bidiclass:cs}+/utf + -->,.\x{060c}\x{ff1a}<-- + 0: ,.\x{60c}\x{ff1a} + +/\p{bidiclass:En}+/utf + -->09\x{b2}\x{2074}\x{1fbf9}<-- + 0: 09\x{b2}\x{2074}\x{1fbf9} + +/\p{bidiclass:es}+/utf + ==>+-\x{207a}\x{ff0d}<== + 0: +-\x{207a}\x{ff0d} + +/\p{bidiclass:et}+/utf + -->#\{24}%\x{a2}\x{A838}\x{1e2ff}<-- + 0: # + +/\p{bidiclass:FSI}+/utf + -->\x{2068}<-- + 0: \x{2068} + +/\p{bidi class:L}+/utf + -->ABC<-- + 0: ABC + +/\P{bidi class:L}+/utf + -->ABC<-- + 0: --> + +/\p{bidi class:LRE}+\p{bidiclass=lri}*\p{bidiclass:lro}/utf + -->\x{202a}\x{2066}\x{202d}<-- + 0: \x{202a}\x{2066}\x{202d} + +/\p{bidi class:NSM}+/utf + -->\x{9bc}\x{a71}\x{e31}<-- + 0: \x{9bc}\x{a71}\x{e31} + +/\p{bidi class:ON}+/utf + -->\x{21}'()*;@\x{384}\x{2039}<=- + 0: >!'()*;@\x{384}\x{2039}<= + +/\p{bidiclass:pdf}\p{bidiclass:pdi}/utf + -->\x{202c}\x{2069}<-- + 0: \x{202c}\x{2069} + +/\p{bidi class:R}+/utf + -->\x{590}\x{5c6}\x{200f}\x{10805}<-- + 0: \x{590}\x{5c6}\x{200f}\x{10805} + +/\p{bidi class:RLE}+\p{bidi class:RLI}*\p{bidi class:RLO}+/utf + -->\x{202b}\x{2067}\x{202e}<-- + 0: \x{202b}\x{2067}\x{202e} + +/\p{bidi class:S}+\p{bidiclass:WS}+/utf + -->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<-- + 0: \x{09}\x{0b}\x{1f} \x{0c} \x{2000} \x{3000} + +# ----------------------------------------------------------------------------- + # End of testinput7