Rework script extension handling (#64)

Co-authored-by: Zoltan Herczeg <hzmester@freemail.hu>
This commit is contained in:
Zoltan Herczeg 2021-12-29 10:35:22 +01:00 committed by GitHub
parent 7713f33e46
commit afa4756d19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 3745 additions and 3736 deletions

View File

@ -184,6 +184,46 @@ break_properties = [
'Extended_Pictographic', '14' 'Extended_Pictographic', '14'
] ]
# ---------------------------------------------------------------------------
# REORDERING SCRIPT NAMES
# ---------------------------------------------------------------------------
import re
def reorder_scripts():
global script_names
global script_abbrevs
extended_script_abbrevs = set()
with open("Unicode.tables/ScriptExtensions.txt") as f:
names_re = re.compile(r'^[0-9A-F]{4,6}(?:\.\.[0-9A-F]{4,6})? +; ([A-Za-z_ ]+) #')
for line in f:
match_obj = names_re.match(line)
if match_obj == None:
continue
for name in match_obj.group(1).split(" "):
extended_script_abbrevs.add(name)
new_script_names = []
new_script_abbrevs = []
for idx, abbrev in enumerate(script_abbrevs):
if abbrev in extended_script_abbrevs:
new_script_names.append(script_names[idx])
new_script_abbrevs.append(abbrev)
for idx, abbrev in enumerate(script_abbrevs):
if abbrev not in extended_script_abbrevs:
new_script_names.append(script_names[idx])
new_script_abbrevs.append(abbrev)
script_names = new_script_names
script_abbrevs = new_script_abbrevs
reorder_scripts()
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# DERIVED LISTS # DERIVED LISTS

View File

@ -252,30 +252,15 @@ def get_other_case(chardata):
# Parse a line of ScriptExtensions.txt # Parse a line of ScriptExtensions.txt
def get_script_extension(chardata): def get_script_extension(chardata):
this_script_list = list(chardata[1].split(' ')) global last_script_extension
if len(this_script_list) == 1:
return script_abbrevs.index(this_script_list[0])
script_numbers = [] offset = len(script_lists) * script_list_item_size
for d in this_script_list: if last_script_extension == chardata[1]:
script_numbers.append(script_abbrevs.index(d)) return offset - script_list_item_size
script_numbers.append(0)
script_numbers_length = len(script_numbers)
for i in range(1, len(script_lists) - script_numbers_length + 1): last_script_extension = chardata[1]
for j in range(0, script_numbers_length): script_lists.append(tuple(script_abbrevs.index(abbrev) for abbrev in last_script_extension.split(' ')))
found = True return offset
if script_lists[i+j] != script_numbers[j]:
found = False
break
if found:
return -i
# Not found in existing lists
return_value = len(script_lists)
script_lists.extend(script_numbers)
return -return_value
# Read a whole table in memory, setting/checking the Unicode version # Read a whole table in memory, setting/checking the Unicode version
@ -538,26 +523,10 @@ file.close()
# multiple scripts. Initialize this list with a single entry, as the zeroth # multiple scripts. Initialize this list with a single entry, as the zeroth
# element is never used. # element is never used.
script_lists = [0] script_lists = [[]]
script_abbrevs_default = script_abbrevs.index('Zzzz') script_list_item_size = (script_names.index('Unknown') + 31) // 32
scriptx = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, script_abbrevs_default) last_script_extension = ""
scriptx = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0)
# Scan all characters and set their default script extension to the main
# script. We also have to adjust negative scriptx values, following a change in
# the way these work. They are currently negated offsets into the script_lists
# list, but have to be changed into indices in the new ucd_script_sets vector,
# which has fixed-size entries. We can compute the new offset by counting the
# zeros that precede the current offset.
for i in range(0, MAX_UNICODE):
if scriptx[i] == script_abbrevs_default:
scriptx[i] = script[i]
elif scriptx[i] < 0:
count = 1
for j in range(-scriptx[i], 0, -1):
if script_lists[j] == 0:
count += 1
scriptx[i] = -count * (int(len(script_names)/32) + 1)
# With the addition of the Script Extensions field, we needed some padding to # With the addition of the Script Extensions field, we needed some padding to
# get the Unicode records up to 12 bytes (multiple of 4). Originally this was a # get the Unicode records up to 12 bytes (multiple of 4). Originally this was a
@ -565,7 +534,7 @@ for i in range(0, MAX_UNICODE):
# are now used for the bidi class, so zero will do. # are now used for the bidi class, so zero will do.
padding_dummy = [0] * MAX_UNICODE padding_dummy = [0] * MAX_UNICODE
padding_dummy[0] = 0 padding_dummy[0] = 256
# This block of code was added by PH in September 2012. It scans the other_case # This block of code was added by PH in September 2012. It scans the other_case
# table to find sets of more than two characters that must all match each other # table to find sets of more than two characters that must all match each other
@ -806,24 +775,19 @@ f.write("""\
const uint32_t PRIV(ucd_script_sets)[] = { const uint32_t PRIV(ucd_script_sets)[] = {
""") """)
bitword_count = len(script_names)/32 + 1
bitwords = [0] * int(bitword_count)
for d in script_lists: for d in script_lists:
if d == 0: bitwords = [0] * script_list_item_size
for idx in d:
bitwords[idx // 32] |= 1 << (idx % 31)
s = " " s = " "
f.write(" ")
for x in bitwords: for x in bitwords:
f.write("%s" % s) f.write("%s" % s)
s = ", " s = ", "
f.write("0x%08xu" % x) f.write("0x%08xu" % x)
f.write(",\n") f.write(",\n")
bitwords = [0] * int(bitword_count)
else:
x = int(d/32)
y = int(d%32)
bitwords[x] = bitwords[x] | (1 << y)
f.write("};\n\n") f.write("};\n\n")

View File

@ -64,8 +64,10 @@ for i in range(0, len(break_properties), 2):
f.write(" ucp_gb%s,%s /* %s */\n" % (break_properties[i], sp, break_properties[i+1])) f.write(" ucp_gb%s,%s /* %s */\n" % (break_properties[i], sp, break_properties[i+1]))
f.write("};\n\n") f.write("};\n\n")
f.write("/* These are the script identifications, additions happen at the end. */\n\nenum {\n") f.write("/* These are the script identifications. */\n\nenum {\n /* Scripts which has characters in other scripts. */\n")
for i in script_names: for i in script_names:
if i == "Unknown":
f.write("\n /* Scripts which has no characters in other scripts. */\n")
f.write(" ucp_%s,\n" % i) f.write(" ucp_%s,\n" % i)
f.write("\n") f.write("\n")

View File

@ -92,8 +92,12 @@ std_bidi_class_names = stdnames(bidi_class_names)
# latter is used for the ucp_xx names. NOTE: for the script abbreviations, we # latter is used for the ucp_xx names. NOTE: for the script abbreviations, we
# still use the full original names. # still use the full original names.
utt_table = list(zip(std_script_names, script_names, ['PT_SCX'] * len(script_names))) scx_end = script_names.index('Unknown')
utt_table += list(zip(std_script_abbrevs, script_names, ['PT_SCX'] * len(script_abbrevs)))
utt_table = list(zip(std_script_names[0:scx_end], script_names[0:scx_end], ['PT_SCX'] * scx_end))
utt_table += list(zip(std_script_names[scx_end:], script_names[scx_end:], ['PT_SC'] * (len(script_names) - scx_end)))
utt_table += list(zip(std_script_abbrevs[0:scx_end], script_names[0:scx_end], ['PT_SCX'] * scx_end))
utt_table += list(zip(std_script_abbrevs[scx_end:], script_names[scx_end:], ['PT_SC'] * (len(script_names) - scx_end)))
# At lease one script abbreviation is the same as the full name of the script, # At lease one script abbreviation is the same as the full name of the script,
# so we must remove duplicates. It doesn't matter if this operation changes the # so we must remove duplicates. It doesn't matter if this operation changes the

View File

@ -200,7 +200,6 @@ check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
BOOL negated) BOOL negated)
{ {
BOOL ok; BOOL ok;
int scriptx;
const uint32_t *p; const uint32_t *p;
const ucd_record *prop = GET_UCD(c); const ucd_record *prop = GET_UCD(c);
@ -221,10 +220,8 @@ switch(ptype)
return (pdata == prop->script) == negated; return (pdata == prop->script) == negated;
case PT_SCX: case PT_SCX:
scriptx = prop->scriptx; ok = (pdata == prop->script
ok = pdata == prop->script || pdata == (unsigned int)scriptx; || MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, pdata) != 0);
if (!ok && scriptx < 0)
ok = MAPBIT(PRIV(ucd_script_sets) - scriptx, pdata) != 0;
return ok == negated; return ok == negated;
/* These are specials */ /* These are specials */

View File

@ -2206,15 +2206,25 @@ while (bot < top)
{ {
*pdataptr = PRIV(utt)[i].value; *pdataptr = PRIV(utt)[i].value;
if (vptr == NULL || ptscript == PT_NOTSCRIPT) if (vptr == NULL || ptscript == PT_NOTSCRIPT)
*ptypeptr = PRIV(utt)[i].type;
else
{ {
if (PRIV(utt)[i].type != PT_SCX) break; /* Non-script found */ *ptypeptr = PRIV(utt)[i].type;
*ptypeptr = ptscript;
}
return TRUE; return TRUE;
} }
switch (PRIV(utt)[i].type)
{
case PT_SC:
*ptypeptr = PT_SC;
return TRUE;
case PT_SCX:
*ptypeptr = ptscript;
return TRUE;
}
break; /* Non-script found */
}
if (r > 0) bot = i + 1; else top = i; if (r > 0) bot = i + 1; else top = i;
} }

View File

@ -1194,9 +1194,8 @@ for (;;)
break; break;
case PT_SCX: case PT_SCX:
OK = prop->script == code[2] || prop->scriptx == (int)code[2]; OK = (prop->script == code[2] ||
if (!OK && prop->scriptx < 0) MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[2]) != 0);
OK = MAPBIT(PRIV(ucd_script_sets) - prop->scriptx, code[2]) != 0;
break; break;
/* These are specials for combination cases. */ /* These are specials for combination cases. */
@ -1466,9 +1465,8 @@ for (;;)
break; break;
case PT_SCX: case PT_SCX:
OK = prop->script == code[3] || prop->scriptx == (int)code[3]; OK = (prop->script == code[3] ||
if (!OK && prop->scriptx < 0) MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[3]) != 0);
OK = MAPBIT(PRIV(ucd_script_sets) - prop->scriptx, code[3]) != 0;
break; break;
/* These are specials for combination cases. */ /* These are specials for combination cases. */
@ -1721,9 +1719,8 @@ for (;;)
break; break;
case PT_SCX: case PT_SCX:
OK = prop->script == code[3] || prop->scriptx == (int)code[3]; OK = (prop->script == code[3] ||
if (!OK && prop->scriptx < 0) MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[3]) != 0);
OK = MAPBIT(PRIV(ucd_script_sets) - prop->scriptx, code[3]) != 0;
break; break;
/* These are specials for combination cases. */ /* These are specials for combination cases. */
@ -2001,11 +1998,9 @@ for (;;)
break; break;
case PT_SCX: case PT_SCX:
OK = prop->script == code[1 + IMM2_SIZE + 2] || OK = (prop->script == code[1 + IMM2_SIZE + 2] ||
prop->scriptx == (int)code[1 + IMM2_SIZE + 2]; MAPBIT(PRIV(ucd_script_sets) + prop->scriptx,
if (!OK && prop->scriptx < 0) code[1 + IMM2_SIZE + 2]) != 0);
OK = MAPBIT(PRIV(ucd_script_sets) - prop->scriptx,
code[1 + IMM2_SIZE + 2]) != 0;
break; break;
/* These are specials for combination cases. */ /* These are specials for combination cases. */

View File

@ -1822,9 +1822,9 @@ typedef struct {
uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */ uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */
uint8_t caseset; /* offset to multichar other cases or zero */ uint8_t caseset; /* offset to multichar other cases or zero */
int32_t other_case; /* offset to other case, or zero if none */ int32_t other_case; /* offset to other case, or zero if none */
int16_t scriptx; /* script extension value */ uint8_t scriptx; /* script extension value */
uint8_t bidi; /* bidi class and control flag */ uint8_t bidi; /* bidi class and control flag */
uint8_t dummy; /* spare - to round to multiple of 4 bytes */ uint16_t dummy; /* spare - to round to multiple of 4 bytes */
} ucd_record; } ucd_record;
/* UCD access macros */ /* UCD access macros */
@ -1849,8 +1849,8 @@ typedef struct {
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case))) #define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
#define UCD_SCRIPTX(ch) GET_UCD(ch)->scriptx #define UCD_SCRIPTX(ch) GET_UCD(ch)->scriptx
/* The "scriptx" field, when negative, gives an offset into a vector of 32-bit /* The "scriptx" field gives an offset into a vector of 32-bit words that
words that form a bitmap representing a list of scripts. This macro tests for a form a bitmap representing a list of scripts. This macro tests for a
script in the map by number. */ script in the map by number. */
#define MAPBIT(map,script) ((map)[(script)/32]&(1u<<((script)%32))) #define MAPBIT(map,script) ((map)[(script)/32]&(1u<<((script)%32)))

View File

@ -2454,11 +2454,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
case PT_SCX: case PT_SCX:
{ {
int scriptx = prop->scriptx; BOOL ok = (Fecode[2] == prop->script ||
BOOL ok = Fecode[2] == prop->script || MAPBIT((PRIV(ucd_script_sets) + prop->scriptx), Fecode[2]) != 0);
Fecode[2] == (unsigned int)scriptx;
if (!ok && scriptx < 0)
ok = MAPBIT((PRIV(ucd_script_sets) - scriptx), Fecode[2]) != 0;
if (ok == notmatch) RRETURN(MATCH_NOMATCH); if (ok == notmatch) RRETURN(MATCH_NOMATCH);
} }
break; break;
@ -2728,7 +2725,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
for (i = 1; i <= Lmin; i++) for (i = 1; i <= Lmin; i++)
{ {
BOOL ok; BOOL ok;
int scriptx;
const ucd_record *prop; const ucd_record *prop;
if (Feptr >= mb->end_subject) if (Feptr >= mb->end_subject)
{ {
@ -2737,10 +2733,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
} }
GETCHARINCTEST(fc, Feptr); GETCHARINCTEST(fc, Feptr);
prop = GET_UCD(fc); prop = GET_UCD(fc);
scriptx = prop->scriptx; ok = (prop->script == Lpropvalue ||
ok = prop->script == Lpropvalue || scriptx == (int)Lpropvalue; MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
if (!ok && scriptx < 0)
ok = MAPBIT(PRIV(ucd_script_sets) - scriptx, Lpropvalue) != 0;
if (ok == notmatch) if (ok == notmatch)
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
@ -3521,7 +3515,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
for (;;) for (;;)
{ {
BOOL ok; BOOL ok;
int scriptx;
const ucd_record *prop; const ucd_record *prop;
RMATCH(Fecode, RM225); RMATCH(Fecode, RM225);
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@ -3533,10 +3526,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
} }
GETCHARINCTEST(fc, Feptr); GETCHARINCTEST(fc, Feptr);
prop = GET_UCD(fc); prop = GET_UCD(fc);
scriptx = prop->scriptx; ok = (prop->script == Lpropvalue
ok = prop->script == Lpropvalue || scriptx == (int)Lpropvalue; || MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
if (!ok && scriptx < 0)
ok = MAPBIT(PRIV(ucd_script_sets) - scriptx, Lpropvalue) != 0;
if (ok == (Lctype == OP_NOTPROP)) if (ok == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
@ -4104,7 +4095,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
{ {
BOOL ok; BOOL ok;
const ucd_record *prop; const ucd_record *prop;
int scriptx;
int len = 1; int len = 1;
if (Feptr >= mb->end_subject) if (Feptr >= mb->end_subject)
{ {
@ -4113,10 +4103,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
} }
GETCHARLENTEST(fc, Feptr, len); GETCHARLENTEST(fc, Feptr, len);
prop = GET_UCD(fc); prop = GET_UCD(fc);
scriptx = prop->scriptx; ok = (prop->script == Lpropvalue ||
ok = prop->script == Lpropvalue || scriptx == (int)Lpropvalue; MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
if (!ok && scriptx < 0)
ok = MAPBIT(PRIV(ucd_script_sets) - scriptx, Lpropvalue) != 0;
if (ok == notmatch) break; if (ok == notmatch) break;
Feptr+= len; Feptr+= len;
} }

File diff suppressed because it is too large Load Diff

View File

@ -149,57 +149,37 @@ enum {
ucp_gbExtended_Pictographic, /* 14 */ ucp_gbExtended_Pictographic, /* 14 */
}; };
/* These are the script identifications, additions happen at the end. */ /* These are the script identifications. */
enum { enum {
ucp_Unknown, /* Scripts which has characters in other scripts. */
ucp_Arabic, ucp_Arabic,
ucp_Armenian,
ucp_Bengali, ucp_Bengali,
ucp_Bopomofo, ucp_Bopomofo,
ucp_Braille,
ucp_Buginese, ucp_Buginese,
ucp_Buhid, ucp_Buhid,
ucp_Canadian_Aboriginal,
ucp_Cherokee,
ucp_Common,
ucp_Coptic, ucp_Coptic,
ucp_Cypriot, ucp_Cypriot,
ucp_Cyrillic, ucp_Cyrillic,
ucp_Deseret,
ucp_Devanagari, ucp_Devanagari,
ucp_Ethiopic,
ucp_Georgian, ucp_Georgian,
ucp_Glagolitic, ucp_Glagolitic,
ucp_Gothic,
ucp_Greek, ucp_Greek,
ucp_Gujarati, ucp_Gujarati,
ucp_Gurmukhi, ucp_Gurmukhi,
ucp_Han, ucp_Han,
ucp_Hangul, ucp_Hangul,
ucp_Hanunoo, ucp_Hanunoo,
ucp_Hebrew,
ucp_Hiragana, ucp_Hiragana,
ucp_Inherited,
ucp_Kannada, ucp_Kannada,
ucp_Katakana, ucp_Katakana,
ucp_Kharoshthi,
ucp_Khmer,
ucp_Lao,
ucp_Latin, ucp_Latin,
ucp_Limbu, ucp_Limbu,
ucp_Linear_B, ucp_Linear_B,
ucp_Malayalam, ucp_Malayalam,
ucp_Mongolian, ucp_Mongolian,
ucp_Myanmar, ucp_Myanmar,
ucp_New_Tai_Lue,
ucp_Ogham,
ucp_Old_Italic,
ucp_Old_Persian,
ucp_Oriya, ucp_Oriya,
ucp_Osmanya,
ucp_Runic,
ucp_Shavian,
ucp_Sinhala, ucp_Sinhala,
ucp_Syloti_Nagri, ucp_Syloti_Nagri,
ucp_Syriac, ucp_Syriac,
@ -209,19 +189,70 @@ enum {
ucp_Tamil, ucp_Tamil,
ucp_Telugu, ucp_Telugu,
ucp_Thaana, ucp_Thaana,
ucp_Yi,
ucp_Nko,
ucp_Phags_Pa,
ucp_Kayah_Li,
ucp_Javanese,
ucp_Kaithi,
ucp_Mandaic,
ucp_Chakma,
ucp_Sharada,
ucp_Takri,
ucp_Duployan,
ucp_Grantha,
ucp_Khojki,
ucp_Khudawadi,
ucp_Linear_A,
ucp_Mahajani,
ucp_Manichaean,
ucp_Modi,
ucp_Old_Permic,
ucp_Psalter_Pahlavi,
ucp_Tirhuta,
ucp_Multani,
ucp_Adlam,
ucp_Masaram_Gondi,
ucp_Dogra,
ucp_Gunjala_Gondi,
ucp_Hanifi_Rohingya,
ucp_Sogdian,
ucp_Nandinagari,
ucp_Yezidi,
ucp_Cypro_Minoan,
ucp_Old_Uyghur,
/* Scripts which has no characters in other scripts. */
ucp_Unknown,
ucp_Armenian,
ucp_Braille,
ucp_Canadian_Aboriginal,
ucp_Cherokee,
ucp_Common,
ucp_Deseret,
ucp_Ethiopic,
ucp_Gothic,
ucp_Hebrew,
ucp_Inherited,
ucp_Kharoshthi,
ucp_Khmer,
ucp_Lao,
ucp_New_Tai_Lue,
ucp_Ogham,
ucp_Old_Italic,
ucp_Old_Persian,
ucp_Osmanya,
ucp_Runic,
ucp_Shavian,
ucp_Thai, ucp_Thai,
ucp_Tibetan, ucp_Tibetan,
ucp_Tifinagh, ucp_Tifinagh,
ucp_Ugaritic, ucp_Ugaritic,
ucp_Yi,
ucp_Balinese, ucp_Balinese,
ucp_Cuneiform, ucp_Cuneiform,
ucp_Nko,
ucp_Phags_Pa,
ucp_Phoenician, ucp_Phoenician,
ucp_Carian, ucp_Carian,
ucp_Cham, ucp_Cham,
ucp_Kayah_Li,
ucp_Lepcha, ucp_Lepcha,
ucp_Lycian, ucp_Lycian,
ucp_Lydian, ucp_Lydian,
@ -236,8 +267,6 @@ enum {
ucp_Imperial_Aramaic, ucp_Imperial_Aramaic,
ucp_Inscriptional_Pahlavi, ucp_Inscriptional_Pahlavi,
ucp_Inscriptional_Parthian, ucp_Inscriptional_Parthian,
ucp_Javanese,
ucp_Kaithi,
ucp_Lisu, ucp_Lisu,
ucp_Meetei_Mayek, ucp_Meetei_Mayek,
ucp_Old_South_Arabian, ucp_Old_South_Arabian,
@ -247,70 +276,44 @@ enum {
ucp_Tai_Viet, ucp_Tai_Viet,
ucp_Batak, ucp_Batak,
ucp_Brahmi, ucp_Brahmi,
ucp_Mandaic,
ucp_Chakma,
ucp_Meroitic_Cursive, ucp_Meroitic_Cursive,
ucp_Meroitic_Hieroglyphs, ucp_Meroitic_Hieroglyphs,
ucp_Miao, ucp_Miao,
ucp_Sharada,
ucp_Sora_Sompeng, ucp_Sora_Sompeng,
ucp_Takri,
ucp_Bassa_Vah, ucp_Bassa_Vah,
ucp_Caucasian_Albanian, ucp_Caucasian_Albanian,
ucp_Duployan,
ucp_Elbasan, ucp_Elbasan,
ucp_Grantha,
ucp_Khojki,
ucp_Khudawadi,
ucp_Linear_A,
ucp_Mahajani,
ucp_Manichaean,
ucp_Mende_Kikakui, ucp_Mende_Kikakui,
ucp_Modi,
ucp_Mro, ucp_Mro,
ucp_Nabataean, ucp_Nabataean,
ucp_Old_North_Arabian, ucp_Old_North_Arabian,
ucp_Old_Permic,
ucp_Pahawh_Hmong, ucp_Pahawh_Hmong,
ucp_Palmyrene, ucp_Palmyrene,
ucp_Psalter_Pahlavi,
ucp_Pau_Cin_Hau, ucp_Pau_Cin_Hau,
ucp_Siddham, ucp_Siddham,
ucp_Tirhuta,
ucp_Warang_Citi, ucp_Warang_Citi,
ucp_Ahom, ucp_Ahom,
ucp_Anatolian_Hieroglyphs, ucp_Anatolian_Hieroglyphs,
ucp_Hatran, ucp_Hatran,
ucp_Multani,
ucp_Old_Hungarian, ucp_Old_Hungarian,
ucp_SignWriting, ucp_SignWriting,
ucp_Adlam,
ucp_Bhaiksuki, ucp_Bhaiksuki,
ucp_Marchen, ucp_Marchen,
ucp_Newa, ucp_Newa,
ucp_Osage, ucp_Osage,
ucp_Tangut, ucp_Tangut,
ucp_Masaram_Gondi,
ucp_Nushu, ucp_Nushu,
ucp_Soyombo, ucp_Soyombo,
ucp_Zanabazar_Square, ucp_Zanabazar_Square,
ucp_Dogra,
ucp_Gunjala_Gondi,
ucp_Hanifi_Rohingya,
ucp_Makasar, ucp_Makasar,
ucp_Medefaidrin, ucp_Medefaidrin,
ucp_Old_Sogdian, ucp_Old_Sogdian,
ucp_Sogdian,
ucp_Elymaic, ucp_Elymaic,
ucp_Nandinagari,
ucp_Nyiakeng_Puachue_Hmong, ucp_Nyiakeng_Puachue_Hmong,
ucp_Wancho, ucp_Wancho,
ucp_Chorasmian, ucp_Chorasmian,
ucp_Dives_Akuru, ucp_Dives_Akuru,
ucp_Khitan_Small_Script, ucp_Khitan_Small_Script,
ucp_Yezidi,
ucp_Cypro_Minoan,
ucp_Old_Uyghur,
ucp_Tangsa, ucp_Tangsa,
ucp_Toto, ucp_Toto,
ucp_Vithkuqi, ucp_Vithkuqi,

View File

@ -833,29 +833,29 @@ const char PRIV(utt_names)[] =
const ucp_type_table PRIV(utt)[] = { const ucp_type_table PRIV(utt)[] = {
{ 0, PT_SCX, ucp_Adlam }, { 0, PT_SCX, ucp_Adlam },
{ 6, PT_SCX, ucp_Adlam }, { 6, PT_SCX, ucp_Adlam },
{ 11, PT_SCX, ucp_Caucasian_Albanian }, { 11, PT_SC, ucp_Caucasian_Albanian },
{ 16, PT_SCX, ucp_Ahom }, { 16, PT_SC, ucp_Ahom },
{ 21, PT_SCX, ucp_Anatolian_Hieroglyphs }, { 21, PT_SC, ucp_Anatolian_Hieroglyphs },
{ 42, PT_ANY, 0 }, { 42, PT_ANY, 0 },
{ 46, PT_SCX, ucp_Arabic }, { 46, PT_SCX, ucp_Arabic },
{ 51, PT_SCX, ucp_Arabic }, { 51, PT_SCX, ucp_Arabic },
{ 58, PT_SCX, ucp_Armenian }, { 58, PT_SC, ucp_Armenian },
{ 67, PT_SCX, ucp_Imperial_Aramaic }, { 67, PT_SC, ucp_Imperial_Aramaic },
{ 72, PT_SCX, ucp_Armenian }, { 72, PT_SC, ucp_Armenian },
{ 77, PT_SCX, ucp_Avestan }, { 77, PT_SC, ucp_Avestan },
{ 85, PT_SCX, ucp_Avestan }, { 85, PT_SC, ucp_Avestan },
{ 90, PT_SCX, ucp_Balinese }, { 90, PT_SC, ucp_Balinese },
{ 95, PT_SCX, ucp_Balinese }, { 95, PT_SC, ucp_Balinese },
{ 104, PT_SCX, ucp_Bamum }, { 104, PT_SC, ucp_Bamum },
{ 109, PT_SCX, ucp_Bamum }, { 109, PT_SC, ucp_Bamum },
{ 115, PT_SCX, ucp_Bassa_Vah }, { 115, PT_SC, ucp_Bassa_Vah },
{ 120, PT_SCX, ucp_Bassa_Vah }, { 120, PT_SC, ucp_Bassa_Vah },
{ 129, PT_SCX, ucp_Batak }, { 129, PT_SC, ucp_Batak },
{ 135, PT_SCX, ucp_Batak }, { 135, PT_SC, ucp_Batak },
{ 140, PT_SCX, ucp_Bengali }, { 140, PT_SCX, ucp_Bengali },
{ 145, PT_SCX, ucp_Bengali }, { 145, PT_SCX, ucp_Bengali },
{ 153, PT_SCX, ucp_Bhaiksuki }, { 153, PT_SC, ucp_Bhaiksuki },
{ 163, PT_SCX, ucp_Bhaiksuki }, { 163, PT_SC, ucp_Bhaiksuki },
{ 168, PT_BIDICL, ucp_bidiAL }, { 168, PT_BIDICL, ucp_bidiAL },
{ 175, PT_BIDICL, ucp_bidiAN }, { 175, PT_BIDICL, ucp_bidiAN },
{ 182, PT_BIDICL, ucp_bidiB }, { 182, PT_BIDICL, ucp_bidiB },
@ -883,68 +883,68 @@ const ucp_type_table PRIV(utt)[] = {
{ 346, PT_BIDICL, ucp_bidiWS }, { 346, PT_BIDICL, ucp_bidiWS },
{ 353, PT_SCX, ucp_Bopomofo }, { 353, PT_SCX, ucp_Bopomofo },
{ 358, PT_SCX, ucp_Bopomofo }, { 358, PT_SCX, ucp_Bopomofo },
{ 367, PT_SCX, ucp_Brahmi }, { 367, PT_SC, ucp_Brahmi },
{ 372, PT_SCX, ucp_Brahmi }, { 372, PT_SC, ucp_Brahmi },
{ 379, PT_SCX, ucp_Braille }, { 379, PT_SC, ucp_Braille },
{ 384, PT_SCX, ucp_Braille }, { 384, PT_SC, ucp_Braille },
{ 392, PT_SCX, ucp_Buginese }, { 392, PT_SCX, ucp_Buginese },
{ 397, PT_SCX, ucp_Buginese }, { 397, PT_SCX, ucp_Buginese },
{ 406, PT_SCX, ucp_Buhid }, { 406, PT_SCX, ucp_Buhid },
{ 411, PT_SCX, ucp_Buhid }, { 411, PT_SCX, ucp_Buhid },
{ 417, PT_GC, ucp_C }, { 417, PT_GC, ucp_C },
{ 419, PT_SCX, ucp_Chakma }, { 419, PT_SCX, ucp_Chakma },
{ 424, PT_SCX, ucp_Canadian_Aboriginal }, { 424, PT_SC, ucp_Canadian_Aboriginal },
{ 443, PT_SCX, ucp_Canadian_Aboriginal }, { 443, PT_SC, ucp_Canadian_Aboriginal },
{ 448, PT_SCX, ucp_Carian }, { 448, PT_SC, ucp_Carian },
{ 453, PT_SCX, ucp_Carian }, { 453, PT_SC, ucp_Carian },
{ 460, PT_SCX, ucp_Caucasian_Albanian }, { 460, PT_SC, ucp_Caucasian_Albanian },
{ 478, PT_PC, ucp_Cc }, { 478, PT_PC, ucp_Cc },
{ 481, PT_PC, ucp_Cf }, { 481, PT_PC, ucp_Cf },
{ 484, PT_SCX, ucp_Chakma }, { 484, PT_SCX, ucp_Chakma },
{ 491, PT_SCX, ucp_Cham }, { 491, PT_SC, ucp_Cham },
{ 496, PT_SCX, ucp_Cherokee }, { 496, PT_SC, ucp_Cherokee },
{ 501, PT_SCX, ucp_Cherokee }, { 501, PT_SC, ucp_Cherokee },
{ 510, PT_SCX, ucp_Chorasmian }, { 510, PT_SC, ucp_Chorasmian },
{ 521, PT_SCX, ucp_Chorasmian }, { 521, PT_SC, ucp_Chorasmian },
{ 526, PT_PC, ucp_Cn }, { 526, PT_PC, ucp_Cn },
{ 529, PT_PC, ucp_Co }, { 529, PT_PC, ucp_Co },
{ 532, PT_SCX, ucp_Common }, { 532, PT_SC, ucp_Common },
{ 539, PT_SCX, ucp_Coptic }, { 539, PT_SCX, ucp_Coptic },
{ 544, PT_SCX, ucp_Coptic }, { 544, PT_SCX, ucp_Coptic },
{ 551, PT_SCX, ucp_Cypro_Minoan }, { 551, PT_SCX, ucp_Cypro_Minoan },
{ 556, PT_SCX, ucp_Cypriot }, { 556, PT_SCX, ucp_Cypriot },
{ 561, PT_PC, ucp_Cs }, { 561, PT_PC, ucp_Cs },
{ 564, PT_SCX, ucp_Cuneiform }, { 564, PT_SC, ucp_Cuneiform },
{ 574, PT_SCX, ucp_Cypriot }, { 574, PT_SCX, ucp_Cypriot },
{ 582, PT_SCX, ucp_Cypro_Minoan }, { 582, PT_SCX, ucp_Cypro_Minoan },
{ 594, PT_SCX, ucp_Cyrillic }, { 594, PT_SCX, ucp_Cyrillic },
{ 603, PT_SCX, ucp_Cyrillic }, { 603, PT_SCX, ucp_Cyrillic },
{ 608, PT_SCX, ucp_Deseret }, { 608, PT_SC, ucp_Deseret },
{ 616, PT_SCX, ucp_Devanagari }, { 616, PT_SCX, ucp_Devanagari },
{ 621, PT_SCX, ucp_Devanagari }, { 621, PT_SCX, ucp_Devanagari },
{ 632, PT_SCX, ucp_Dives_Akuru }, { 632, PT_SC, ucp_Dives_Akuru },
{ 637, PT_SCX, ucp_Dives_Akuru }, { 637, PT_SC, ucp_Dives_Akuru },
{ 648, PT_SCX, ucp_Dogra }, { 648, PT_SCX, ucp_Dogra },
{ 653, PT_SCX, ucp_Dogra }, { 653, PT_SCX, ucp_Dogra },
{ 659, PT_SCX, ucp_Deseret }, { 659, PT_SC, ucp_Deseret },
{ 664, PT_SCX, ucp_Duployan }, { 664, PT_SCX, ucp_Duployan },
{ 669, PT_SCX, ucp_Duployan }, { 669, PT_SCX, ucp_Duployan },
{ 678, PT_SCX, ucp_Egyptian_Hieroglyphs }, { 678, PT_SC, ucp_Egyptian_Hieroglyphs },
{ 683, PT_SCX, ucp_Egyptian_Hieroglyphs }, { 683, PT_SC, ucp_Egyptian_Hieroglyphs },
{ 703, PT_SCX, ucp_Elbasan }, { 703, PT_SC, ucp_Elbasan },
{ 708, PT_SCX, ucp_Elbasan }, { 708, PT_SC, ucp_Elbasan },
{ 716, PT_SCX, ucp_Elymaic }, { 716, PT_SC, ucp_Elymaic },
{ 721, PT_SCX, ucp_Elymaic }, { 721, PT_SC, ucp_Elymaic },
{ 729, PT_SCX, ucp_Ethiopic }, { 729, PT_SC, ucp_Ethiopic },
{ 734, PT_SCX, ucp_Ethiopic }, { 734, PT_SC, ucp_Ethiopic },
{ 743, PT_SCX, ucp_Georgian }, { 743, PT_SCX, ucp_Georgian },
{ 748, PT_SCX, ucp_Georgian }, { 748, PT_SCX, ucp_Georgian },
{ 757, PT_SCX, ucp_Glagolitic }, { 757, PT_SCX, ucp_Glagolitic },
{ 762, PT_SCX, ucp_Glagolitic }, { 762, PT_SCX, ucp_Glagolitic },
{ 773, PT_SCX, ucp_Gunjala_Gondi }, { 773, PT_SCX, ucp_Gunjala_Gondi },
{ 778, PT_SCX, ucp_Masaram_Gondi }, { 778, PT_SCX, ucp_Masaram_Gondi },
{ 783, PT_SCX, ucp_Gothic }, { 783, PT_SC, ucp_Gothic },
{ 788, PT_SCX, ucp_Gothic }, { 788, PT_SC, ucp_Gothic },
{ 795, PT_SCX, ucp_Grantha }, { 795, PT_SCX, ucp_Grantha },
{ 800, PT_SCX, ucp_Grantha }, { 800, PT_SCX, ucp_Grantha },
{ 808, PT_SCX, ucp_Greek }, { 808, PT_SCX, ucp_Greek },
@ -961,21 +961,21 @@ const ucp_type_table PRIV(utt)[] = {
{ 881, PT_SCX, ucp_Hanifi_Rohingya }, { 881, PT_SCX, ucp_Hanifi_Rohingya },
{ 896, PT_SCX, ucp_Hanunoo }, { 896, PT_SCX, ucp_Hanunoo },
{ 901, PT_SCX, ucp_Hanunoo }, { 901, PT_SCX, ucp_Hanunoo },
{ 909, PT_SCX, ucp_Hatran }, { 909, PT_SC, ucp_Hatran },
{ 914, PT_SCX, ucp_Hatran }, { 914, PT_SC, ucp_Hatran },
{ 921, PT_SCX, ucp_Hebrew }, { 921, PT_SC, ucp_Hebrew },
{ 926, PT_SCX, ucp_Hebrew }, { 926, PT_SC, ucp_Hebrew },
{ 933, PT_SCX, ucp_Hiragana }, { 933, PT_SCX, ucp_Hiragana },
{ 938, PT_SCX, ucp_Hiragana }, { 938, PT_SCX, ucp_Hiragana },
{ 947, PT_SCX, ucp_Anatolian_Hieroglyphs }, { 947, PT_SC, ucp_Anatolian_Hieroglyphs },
{ 952, PT_SCX, ucp_Pahawh_Hmong }, { 952, PT_SC, ucp_Pahawh_Hmong },
{ 957, PT_SCX, ucp_Nyiakeng_Puachue_Hmong }, { 957, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
{ 962, PT_SCX, ucp_Old_Hungarian }, { 962, PT_SC, ucp_Old_Hungarian },
{ 967, PT_SCX, ucp_Imperial_Aramaic }, { 967, PT_SC, ucp_Imperial_Aramaic },
{ 983, PT_SCX, ucp_Inherited }, { 983, PT_SC, ucp_Inherited },
{ 993, PT_SCX, ucp_Inscriptional_Pahlavi }, { 993, PT_SC, ucp_Inscriptional_Pahlavi },
{ 1014, PT_SCX, ucp_Inscriptional_Parthian }, { 1014, PT_SC, ucp_Inscriptional_Parthian },
{ 1036, PT_SCX, ucp_Old_Italic }, { 1036, PT_SC, ucp_Old_Italic },
{ 1041, PT_SCX, ucp_Javanese }, { 1041, PT_SCX, ucp_Javanese },
{ 1046, PT_SCX, ucp_Javanese }, { 1046, PT_SCX, ucp_Javanese },
{ 1055, PT_SCX, ucp_Kaithi }, { 1055, PT_SCX, ucp_Kaithi },
@ -984,123 +984,123 @@ const ucp_type_table PRIV(utt)[] = {
{ 1072, PT_SCX, ucp_Kannada }, { 1072, PT_SCX, ucp_Kannada },
{ 1080, PT_SCX, ucp_Katakana }, { 1080, PT_SCX, ucp_Katakana },
{ 1089, PT_SCX, ucp_Kayah_Li }, { 1089, PT_SCX, ucp_Kayah_Li },
{ 1097, PT_SCX, ucp_Kharoshthi }, { 1097, PT_SC, ucp_Kharoshthi },
{ 1102, PT_SCX, ucp_Kharoshthi }, { 1102, PT_SC, ucp_Kharoshthi },
{ 1113, PT_SCX, ucp_Khitan_Small_Script }, { 1113, PT_SC, ucp_Khitan_Small_Script },
{ 1131, PT_SCX, ucp_Khmer }, { 1131, PT_SC, ucp_Khmer },
{ 1137, PT_SCX, ucp_Khmer }, { 1137, PT_SC, ucp_Khmer },
{ 1142, PT_SCX, ucp_Khojki }, { 1142, PT_SCX, ucp_Khojki },
{ 1147, PT_SCX, ucp_Khojki }, { 1147, PT_SCX, ucp_Khojki },
{ 1154, PT_SCX, ucp_Khudawadi }, { 1154, PT_SCX, ucp_Khudawadi },
{ 1164, PT_SCX, ucp_Khitan_Small_Script }, { 1164, PT_SC, ucp_Khitan_Small_Script },
{ 1169, PT_SCX, ucp_Kannada }, { 1169, PT_SCX, ucp_Kannada },
{ 1174, PT_SCX, ucp_Kaithi }, { 1174, PT_SCX, ucp_Kaithi },
{ 1179, PT_GC, ucp_L }, { 1179, PT_GC, ucp_L },
{ 1181, PT_LAMP, 0 }, { 1181, PT_LAMP, 0 },
{ 1184, PT_SCX, ucp_Tai_Tham }, { 1184, PT_SC, ucp_Tai_Tham },
{ 1189, PT_SCX, ucp_Lao }, { 1189, PT_SC, ucp_Lao },
{ 1193, PT_SCX, ucp_Lao }, { 1193, PT_SC, ucp_Lao },
{ 1198, PT_SCX, ucp_Latin }, { 1198, PT_SCX, ucp_Latin },
{ 1204, PT_SCX, ucp_Latin }, { 1204, PT_SCX, ucp_Latin },
{ 1209, PT_LAMP, 0 }, { 1209, PT_LAMP, 0 },
{ 1212, PT_SCX, ucp_Lepcha }, { 1212, PT_SC, ucp_Lepcha },
{ 1217, PT_SCX, ucp_Lepcha }, { 1217, PT_SC, ucp_Lepcha },
{ 1224, PT_SCX, ucp_Limbu }, { 1224, PT_SCX, ucp_Limbu },
{ 1229, PT_SCX, ucp_Limbu }, { 1229, PT_SCX, ucp_Limbu },
{ 1235, PT_SCX, ucp_Linear_A }, { 1235, PT_SCX, ucp_Linear_A },
{ 1240, PT_SCX, ucp_Linear_B }, { 1240, PT_SCX, ucp_Linear_B },
{ 1245, PT_SCX, ucp_Linear_A }, { 1245, PT_SCX, ucp_Linear_A },
{ 1253, PT_SCX, ucp_Linear_B }, { 1253, PT_SCX, ucp_Linear_B },
{ 1261, PT_SCX, ucp_Lisu }, { 1261, PT_SC, ucp_Lisu },
{ 1266, PT_PC, ucp_Ll }, { 1266, PT_PC, ucp_Ll },
{ 1269, PT_PC, ucp_Lm }, { 1269, PT_PC, ucp_Lm },
{ 1272, PT_PC, ucp_Lo }, { 1272, PT_PC, ucp_Lo },
{ 1275, PT_PC, ucp_Lt }, { 1275, PT_PC, ucp_Lt },
{ 1278, PT_PC, ucp_Lu }, { 1278, PT_PC, ucp_Lu },
{ 1281, PT_SCX, ucp_Lycian }, { 1281, PT_SC, ucp_Lycian },
{ 1286, PT_SCX, ucp_Lycian }, { 1286, PT_SC, ucp_Lycian },
{ 1293, PT_SCX, ucp_Lydian }, { 1293, PT_SC, ucp_Lydian },
{ 1298, PT_SCX, ucp_Lydian }, { 1298, PT_SC, ucp_Lydian },
{ 1305, PT_GC, ucp_M }, { 1305, PT_GC, ucp_M },
{ 1307, PT_SCX, ucp_Mahajani }, { 1307, PT_SCX, ucp_Mahajani },
{ 1316, PT_SCX, ucp_Mahajani }, { 1316, PT_SCX, ucp_Mahajani },
{ 1321, PT_SCX, ucp_Makasar }, { 1321, PT_SC, ucp_Makasar },
{ 1326, PT_SCX, ucp_Makasar }, { 1326, PT_SC, ucp_Makasar },
{ 1334, PT_SCX, ucp_Malayalam }, { 1334, PT_SCX, ucp_Malayalam },
{ 1344, PT_SCX, ucp_Mandaic }, { 1344, PT_SCX, ucp_Mandaic },
{ 1349, PT_SCX, ucp_Mandaic }, { 1349, PT_SCX, ucp_Mandaic },
{ 1357, PT_SCX, ucp_Manichaean }, { 1357, PT_SCX, ucp_Manichaean },
{ 1362, PT_SCX, ucp_Manichaean }, { 1362, PT_SCX, ucp_Manichaean },
{ 1373, PT_SCX, ucp_Marchen }, { 1373, PT_SC, ucp_Marchen },
{ 1378, PT_SCX, ucp_Marchen }, { 1378, PT_SC, ucp_Marchen },
{ 1386, PT_SCX, ucp_Masaram_Gondi }, { 1386, PT_SCX, ucp_Masaram_Gondi },
{ 1399, PT_PC, ucp_Mc }, { 1399, PT_PC, ucp_Mc },
{ 1402, PT_PC, ucp_Me }, { 1402, PT_PC, ucp_Me },
{ 1405, PT_SCX, ucp_Medefaidrin }, { 1405, PT_SC, ucp_Medefaidrin },
{ 1417, PT_SCX, ucp_Medefaidrin }, { 1417, PT_SC, ucp_Medefaidrin },
{ 1422, PT_SCX, ucp_Meetei_Mayek }, { 1422, PT_SC, ucp_Meetei_Mayek },
{ 1434, PT_SCX, ucp_Mende_Kikakui }, { 1434, PT_SC, ucp_Mende_Kikakui },
{ 1439, PT_SCX, ucp_Mende_Kikakui }, { 1439, PT_SC, ucp_Mende_Kikakui },
{ 1452, PT_SCX, ucp_Meroitic_Cursive }, { 1452, PT_SC, ucp_Meroitic_Cursive },
{ 1457, PT_SCX, ucp_Meroitic_Hieroglyphs }, { 1457, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 1462, PT_SCX, ucp_Meroitic_Cursive }, { 1462, PT_SC, ucp_Meroitic_Cursive },
{ 1478, PT_SCX, ucp_Meroitic_Hieroglyphs }, { 1478, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 1498, PT_SCX, ucp_Miao }, { 1498, PT_SC, ucp_Miao },
{ 1503, PT_SCX, ucp_Malayalam }, { 1503, PT_SCX, ucp_Malayalam },
{ 1508, PT_PC, ucp_Mn }, { 1508, PT_PC, ucp_Mn },
{ 1511, PT_SCX, ucp_Modi }, { 1511, PT_SCX, ucp_Modi },
{ 1516, PT_SCX, ucp_Mongolian }, { 1516, PT_SCX, ucp_Mongolian },
{ 1521, PT_SCX, ucp_Mongolian }, { 1521, PT_SCX, ucp_Mongolian },
{ 1531, PT_SCX, ucp_Mro }, { 1531, PT_SC, ucp_Mro },
{ 1535, PT_SCX, ucp_Mro }, { 1535, PT_SC, ucp_Mro },
{ 1540, PT_SCX, ucp_Meetei_Mayek }, { 1540, PT_SC, ucp_Meetei_Mayek },
{ 1545, PT_SCX, ucp_Multani }, { 1545, PT_SCX, ucp_Multani },
{ 1550, PT_SCX, ucp_Multani }, { 1550, PT_SCX, ucp_Multani },
{ 1558, PT_SCX, ucp_Myanmar }, { 1558, PT_SCX, ucp_Myanmar },
{ 1566, PT_SCX, ucp_Myanmar }, { 1566, PT_SCX, ucp_Myanmar },
{ 1571, PT_GC, ucp_N }, { 1571, PT_GC, ucp_N },
{ 1573, PT_SCX, ucp_Nabataean }, { 1573, PT_SC, ucp_Nabataean },
{ 1583, PT_SCX, ucp_Nandinagari }, { 1583, PT_SCX, ucp_Nandinagari },
{ 1588, PT_SCX, ucp_Nandinagari }, { 1588, PT_SCX, ucp_Nandinagari },
{ 1600, PT_SCX, ucp_Old_North_Arabian }, { 1600, PT_SC, ucp_Old_North_Arabian },
{ 1605, PT_SCX, ucp_Nabataean }, { 1605, PT_SC, ucp_Nabataean },
{ 1610, PT_PC, ucp_Nd }, { 1610, PT_PC, ucp_Nd },
{ 1613, PT_SCX, ucp_Newa }, { 1613, PT_SC, ucp_Newa },
{ 1618, PT_SCX, ucp_New_Tai_Lue }, { 1618, PT_SC, ucp_New_Tai_Lue },
{ 1628, PT_SCX, ucp_Nko }, { 1628, PT_SCX, ucp_Nko },
{ 1632, PT_SCX, ucp_Nko }, { 1632, PT_SCX, ucp_Nko },
{ 1637, PT_PC, ucp_Nl }, { 1637, PT_PC, ucp_Nl },
{ 1640, PT_PC, ucp_No }, { 1640, PT_PC, ucp_No },
{ 1643, PT_SCX, ucp_Nushu }, { 1643, PT_SC, ucp_Nushu },
{ 1648, PT_SCX, ucp_Nushu }, { 1648, PT_SC, ucp_Nushu },
{ 1654, PT_SCX, ucp_Nyiakeng_Puachue_Hmong }, { 1654, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
{ 1675, PT_SCX, ucp_Ogham }, { 1675, PT_SC, ucp_Ogham },
{ 1680, PT_SCX, ucp_Ogham }, { 1680, PT_SC, ucp_Ogham },
{ 1686, PT_SCX, ucp_Ol_Chiki }, { 1686, PT_SC, ucp_Ol_Chiki },
{ 1694, PT_SCX, ucp_Ol_Chiki }, { 1694, PT_SC, ucp_Ol_Chiki },
{ 1699, PT_SCX, ucp_Old_Hungarian }, { 1699, PT_SC, ucp_Old_Hungarian },
{ 1712, PT_SCX, ucp_Old_Italic }, { 1712, PT_SC, ucp_Old_Italic },
{ 1722, PT_SCX, ucp_Old_North_Arabian }, { 1722, PT_SC, ucp_Old_North_Arabian },
{ 1738, PT_SCX, ucp_Old_Permic }, { 1738, PT_SCX, ucp_Old_Permic },
{ 1748, PT_SCX, ucp_Old_Persian }, { 1748, PT_SC, ucp_Old_Persian },
{ 1759, PT_SCX, ucp_Old_Sogdian }, { 1759, PT_SC, ucp_Old_Sogdian },
{ 1770, PT_SCX, ucp_Old_South_Arabian }, { 1770, PT_SC, ucp_Old_South_Arabian },
{ 1786, PT_SCX, ucp_Old_Turkic }, { 1786, PT_SC, ucp_Old_Turkic },
{ 1796, PT_SCX, ucp_Old_Uyghur }, { 1796, PT_SCX, ucp_Old_Uyghur },
{ 1806, PT_SCX, ucp_Oriya }, { 1806, PT_SCX, ucp_Oriya },
{ 1812, PT_SCX, ucp_Old_Turkic }, { 1812, PT_SC, ucp_Old_Turkic },
{ 1817, PT_SCX, ucp_Oriya }, { 1817, PT_SCX, ucp_Oriya },
{ 1822, PT_SCX, ucp_Osage }, { 1822, PT_SC, ucp_Osage },
{ 1828, PT_SCX, ucp_Osage }, { 1828, PT_SC, ucp_Osage },
{ 1833, PT_SCX, ucp_Osmanya }, { 1833, PT_SC, ucp_Osmanya },
{ 1838, PT_SCX, ucp_Osmanya }, { 1838, PT_SC, ucp_Osmanya },
{ 1846, PT_SCX, ucp_Old_Uyghur }, { 1846, PT_SCX, ucp_Old_Uyghur },
{ 1851, PT_GC, ucp_P }, { 1851, PT_GC, ucp_P },
{ 1853, PT_SCX, ucp_Pahawh_Hmong }, { 1853, PT_SC, ucp_Pahawh_Hmong },
{ 1865, PT_SCX, ucp_Palmyrene }, { 1865, PT_SC, ucp_Palmyrene },
{ 1870, PT_SCX, ucp_Palmyrene }, { 1870, PT_SC, ucp_Palmyrene },
{ 1880, PT_SCX, ucp_Pau_Cin_Hau }, { 1880, PT_SC, ucp_Pau_Cin_Hau },
{ 1885, PT_SCX, ucp_Pau_Cin_Hau }, { 1885, PT_SC, ucp_Pau_Cin_Hau },
{ 1895, PT_PC, ucp_Pc }, { 1895, PT_PC, ucp_Pc },
{ 1898, PT_PC, ucp_Pd }, { 1898, PT_PC, ucp_Pd },
{ 1901, PT_PC, ucp_Pe }, { 1901, PT_PC, ucp_Pe },
@ -1108,36 +1108,36 @@ const ucp_type_table PRIV(utt)[] = {
{ 1909, PT_PC, ucp_Pf }, { 1909, PT_PC, ucp_Pf },
{ 1912, PT_SCX, ucp_Phags_Pa }, { 1912, PT_SCX, ucp_Phags_Pa },
{ 1917, PT_SCX, ucp_Phags_Pa }, { 1917, PT_SCX, ucp_Phags_Pa },
{ 1925, PT_SCX, ucp_Inscriptional_Pahlavi }, { 1925, PT_SC, ucp_Inscriptional_Pahlavi },
{ 1930, PT_SCX, ucp_Psalter_Pahlavi }, { 1930, PT_SCX, ucp_Psalter_Pahlavi },
{ 1935, PT_SCX, ucp_Phoenician }, { 1935, PT_SC, ucp_Phoenician },
{ 1940, PT_SCX, ucp_Phoenician }, { 1940, PT_SC, ucp_Phoenician },
{ 1951, PT_PC, ucp_Pi }, { 1951, PT_PC, ucp_Pi },
{ 1954, PT_SCX, ucp_Miao }, { 1954, PT_SC, ucp_Miao },
{ 1959, PT_PC, ucp_Po }, { 1959, PT_PC, ucp_Po },
{ 1962, PT_SCX, ucp_Inscriptional_Parthian }, { 1962, PT_SC, ucp_Inscriptional_Parthian },
{ 1967, PT_PC, ucp_Ps }, { 1967, PT_PC, ucp_Ps },
{ 1970, PT_SCX, ucp_Psalter_Pahlavi }, { 1970, PT_SCX, ucp_Psalter_Pahlavi },
{ 1985, PT_SCX, ucp_Rejang }, { 1985, PT_SC, ucp_Rejang },
{ 1992, PT_SCX, ucp_Rejang }, { 1992, PT_SC, ucp_Rejang },
{ 1997, PT_SCX, ucp_Hanifi_Rohingya }, { 1997, PT_SCX, ucp_Hanifi_Rohingya },
{ 2002, PT_SCX, ucp_Runic }, { 2002, PT_SC, ucp_Runic },
{ 2008, PT_SCX, ucp_Runic }, { 2008, PT_SC, ucp_Runic },
{ 2013, PT_GC, ucp_S }, { 2013, PT_GC, ucp_S },
{ 2015, PT_SCX, ucp_Samaritan }, { 2015, PT_SC, ucp_Samaritan },
{ 2025, PT_SCX, ucp_Samaritan }, { 2025, PT_SC, ucp_Samaritan },
{ 2030, PT_SCX, ucp_Old_South_Arabian }, { 2030, PT_SC, ucp_Old_South_Arabian },
{ 2035, PT_SCX, ucp_Saurashtra }, { 2035, PT_SC, ucp_Saurashtra },
{ 2040, PT_SCX, ucp_Saurashtra }, { 2040, PT_SC, ucp_Saurashtra },
{ 2051, PT_PC, ucp_Sc }, { 2051, PT_PC, ucp_Sc },
{ 2054, PT_SCX, ucp_SignWriting }, { 2054, PT_SC, ucp_SignWriting },
{ 2059, PT_SCX, ucp_Sharada }, { 2059, PT_SCX, ucp_Sharada },
{ 2067, PT_SCX, ucp_Shavian }, { 2067, PT_SC, ucp_Shavian },
{ 2075, PT_SCX, ucp_Shavian }, { 2075, PT_SC, ucp_Shavian },
{ 2080, PT_SCX, ucp_Sharada }, { 2080, PT_SCX, ucp_Sharada },
{ 2085, PT_SCX, ucp_Siddham }, { 2085, PT_SC, ucp_Siddham },
{ 2090, PT_SCX, ucp_Siddham }, { 2090, PT_SC, ucp_Siddham },
{ 2098, PT_SCX, ucp_SignWriting }, { 2098, PT_SC, ucp_SignWriting },
{ 2110, PT_SCX, ucp_Khudawadi }, { 2110, PT_SCX, ucp_Khudawadi },
{ 2115, PT_SCX, ucp_Sinhala }, { 2115, PT_SCX, ucp_Sinhala },
{ 2120, PT_SCX, ucp_Sinhala }, { 2120, PT_SCX, ucp_Sinhala },
@ -1146,13 +1146,13 @@ const ucp_type_table PRIV(utt)[] = {
{ 2134, PT_PC, ucp_So }, { 2134, PT_PC, ucp_So },
{ 2137, PT_SCX, ucp_Sogdian }, { 2137, PT_SCX, ucp_Sogdian },
{ 2142, PT_SCX, ucp_Sogdian }, { 2142, PT_SCX, ucp_Sogdian },
{ 2150, PT_SCX, ucp_Old_Sogdian }, { 2150, PT_SC, ucp_Old_Sogdian },
{ 2155, PT_SCX, ucp_Sora_Sompeng }, { 2155, PT_SC, ucp_Sora_Sompeng },
{ 2160, PT_SCX, ucp_Sora_Sompeng }, { 2160, PT_SC, ucp_Sora_Sompeng },
{ 2172, PT_SCX, ucp_Soyombo }, { 2172, PT_SC, ucp_Soyombo },
{ 2177, PT_SCX, ucp_Soyombo }, { 2177, PT_SC, ucp_Soyombo },
{ 2185, PT_SCX, ucp_Sundanese }, { 2185, PT_SC, ucp_Sundanese },
{ 2190, PT_SCX, ucp_Sundanese }, { 2190, PT_SC, ucp_Sundanese },
{ 2200, PT_SCX, ucp_Syloti_Nagri }, { 2200, PT_SCX, ucp_Syloti_Nagri },
{ 2205, PT_SCX, ucp_Syloti_Nagri }, { 2205, PT_SCX, ucp_Syloti_Nagri },
{ 2217, PT_SCX, ucp_Syriac }, { 2217, PT_SCX, ucp_Syriac },
@ -1161,48 +1161,48 @@ const ucp_type_table PRIV(utt)[] = {
{ 2237, PT_SCX, ucp_Tagbanwa }, { 2237, PT_SCX, ucp_Tagbanwa },
{ 2242, PT_SCX, ucp_Tagbanwa }, { 2242, PT_SCX, ucp_Tagbanwa },
{ 2251, PT_SCX, ucp_Tai_Le }, { 2251, PT_SCX, ucp_Tai_Le },
{ 2257, PT_SCX, ucp_Tai_Tham }, { 2257, PT_SC, ucp_Tai_Tham },
{ 2265, PT_SCX, ucp_Tai_Viet }, { 2265, PT_SC, ucp_Tai_Viet },
{ 2273, PT_SCX, ucp_Takri }, { 2273, PT_SCX, ucp_Takri },
{ 2278, PT_SCX, ucp_Takri }, { 2278, PT_SCX, ucp_Takri },
{ 2284, PT_SCX, ucp_Tai_Le }, { 2284, PT_SCX, ucp_Tai_Le },
{ 2289, PT_SCX, ucp_New_Tai_Lue }, { 2289, PT_SC, ucp_New_Tai_Lue },
{ 2294, PT_SCX, ucp_Tamil }, { 2294, PT_SCX, ucp_Tamil },
{ 2300, PT_SCX, ucp_Tamil }, { 2300, PT_SCX, ucp_Tamil },
{ 2305, PT_SCX, ucp_Tangut }, { 2305, PT_SC, ucp_Tangut },
{ 2310, PT_SCX, ucp_Tangsa }, { 2310, PT_SC, ucp_Tangsa },
{ 2317, PT_SCX, ucp_Tangut }, { 2317, PT_SC, ucp_Tangut },
{ 2324, PT_SCX, ucp_Tai_Viet }, { 2324, PT_SC, ucp_Tai_Viet },
{ 2329, PT_SCX, ucp_Telugu }, { 2329, PT_SCX, ucp_Telugu },
{ 2334, PT_SCX, ucp_Telugu }, { 2334, PT_SCX, ucp_Telugu },
{ 2341, PT_SCX, ucp_Tifinagh }, { 2341, PT_SC, ucp_Tifinagh },
{ 2346, PT_SCX, ucp_Tagalog }, { 2346, PT_SCX, ucp_Tagalog },
{ 2351, PT_SCX, ucp_Thaana }, { 2351, PT_SCX, ucp_Thaana },
{ 2356, PT_SCX, ucp_Thaana }, { 2356, PT_SCX, ucp_Thaana },
{ 2363, PT_SCX, ucp_Thai }, { 2363, PT_SC, ucp_Thai },
{ 2368, PT_SCX, ucp_Tibetan }, { 2368, PT_SC, ucp_Tibetan },
{ 2376, PT_SCX, ucp_Tibetan }, { 2376, PT_SC, ucp_Tibetan },
{ 2381, PT_SCX, ucp_Tifinagh }, { 2381, PT_SC, ucp_Tifinagh },
{ 2390, PT_SCX, ucp_Tirhuta }, { 2390, PT_SCX, ucp_Tirhuta },
{ 2395, PT_SCX, ucp_Tirhuta }, { 2395, PT_SCX, ucp_Tirhuta },
{ 2403, PT_SCX, ucp_Tangsa }, { 2403, PT_SC, ucp_Tangsa },
{ 2408, PT_SCX, ucp_Toto }, { 2408, PT_SC, ucp_Toto },
{ 2413, PT_SCX, ucp_Ugaritic }, { 2413, PT_SC, ucp_Ugaritic },
{ 2418, PT_SCX, ucp_Ugaritic }, { 2418, PT_SC, ucp_Ugaritic },
{ 2427, PT_SCX, ucp_Unknown }, { 2427, PT_SC, ucp_Unknown },
{ 2435, PT_SCX, ucp_Vai }, { 2435, PT_SC, ucp_Vai },
{ 2439, PT_SCX, ucp_Vai }, { 2439, PT_SC, ucp_Vai },
{ 2444, PT_SCX, ucp_Vithkuqi }, { 2444, PT_SC, ucp_Vithkuqi },
{ 2449, PT_SCX, ucp_Vithkuqi }, { 2449, PT_SC, ucp_Vithkuqi },
{ 2458, PT_SCX, ucp_Wancho }, { 2458, PT_SC, ucp_Wancho },
{ 2465, PT_SCX, ucp_Warang_Citi }, { 2465, PT_SC, ucp_Warang_Citi },
{ 2470, PT_SCX, ucp_Warang_Citi }, { 2470, PT_SC, ucp_Warang_Citi },
{ 2481, PT_SCX, ucp_Wancho }, { 2481, PT_SC, ucp_Wancho },
{ 2486, PT_ALNUM, 0 }, { 2486, PT_ALNUM, 0 },
{ 2490, PT_SCX, ucp_Old_Persian }, { 2490, PT_SC, ucp_Old_Persian },
{ 2495, PT_PXSPACE, 0 }, { 2495, PT_PXSPACE, 0 },
{ 2499, PT_SPACE, 0 }, { 2499, PT_SPACE, 0 },
{ 2503, PT_SCX, ucp_Cuneiform }, { 2503, PT_SC, ucp_Cuneiform },
{ 2508, PT_UCNC, 0 }, { 2508, PT_UCNC, 0 },
{ 2512, PT_WORD, 0 }, { 2512, PT_WORD, 0 },
{ 2516, PT_SCX, ucp_Yezidi }, { 2516, PT_SCX, ucp_Yezidi },
@ -1210,14 +1210,14 @@ const ucp_type_table PRIV(utt)[] = {
{ 2528, PT_SCX, ucp_Yi }, { 2528, PT_SCX, ucp_Yi },
{ 2531, PT_SCX, ucp_Yi }, { 2531, PT_SCX, ucp_Yi },
{ 2536, PT_GC, ucp_Z }, { 2536, PT_GC, ucp_Z },
{ 2538, PT_SCX, ucp_Zanabazar_Square }, { 2538, PT_SC, ucp_Zanabazar_Square },
{ 2554, PT_SCX, ucp_Zanabazar_Square }, { 2554, PT_SC, ucp_Zanabazar_Square },
{ 2559, PT_SCX, ucp_Inherited }, { 2559, PT_SC, ucp_Inherited },
{ 2564, PT_PC, ucp_Zl }, { 2564, PT_PC, ucp_Zl },
{ 2567, PT_PC, ucp_Zp }, { 2567, PT_PC, ucp_Zp },
{ 2570, PT_PC, ucp_Zs }, { 2570, PT_PC, ucp_Zs },
{ 2573, PT_SCX, ucp_Common }, { 2573, PT_SC, ucp_Common },
{ 2578, PT_SCX, ucp_Unknown } { 2578, PT_SC, ucp_Unknown }
}; };
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);

View File

@ -134,7 +134,6 @@ while ((t = *data++) != XCL_END)
else /* XCL_PROP & XCL_NOTPROP */ else /* XCL_PROP & XCL_NOTPROP */
{ {
const ucd_record *prop = GET_UCD(c); const ucd_record *prop = GET_UCD(c);
int scriptx;
BOOL isprop = t == XCL_PROP; BOOL isprop = t == XCL_PROP;
BOOL ok; BOOL ok;
@ -163,10 +162,8 @@ while ((t = *data++) != XCL_END)
break; break;
case PT_SCX: case PT_SCX:
scriptx = prop->scriptx; ok = (data[1] == prop->script ||
ok = data[1] == prop->script || data[1] == (PCRE2_UCHAR)scriptx; MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, data[1]) != 0);
if (!ok && scriptx < 0)
ok = MAPBIT(PRIV(ucd_script_sets) - scriptx, data[1]);
if (ok == isprop) return !negated; if (ok == isprop) return !negated;
break; break;