Rework script extension handling (#64)

Co-authored-by: Zoltan Herczeg <hzmester@freemail.hu>
This commit is contained in:
Zoltan Herczeg 2021-12-29 10:35:22 +01:00 committed by GitHub
parent 7713f33e46
commit afa4756d19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 3745 additions and 3736 deletions

View File

@ -184,6 +184,46 @@ break_properties = [
'Extended_Pictographic', '14'
]
# ---------------------------------------------------------------------------
# REORDERING SCRIPT NAMES
# ---------------------------------------------------------------------------
import re
def reorder_scripts():
global script_names
global script_abbrevs
extended_script_abbrevs = set()
with open("Unicode.tables/ScriptExtensions.txt") as f:
names_re = re.compile(r'^[0-9A-F]{4,6}(?:\.\.[0-9A-F]{4,6})? +; ([A-Za-z_ ]+) #')
for line in f:
match_obj = names_re.match(line)
if match_obj == None:
continue
for name in match_obj.group(1).split(" "):
extended_script_abbrevs.add(name)
new_script_names = []
new_script_abbrevs = []
for idx, abbrev in enumerate(script_abbrevs):
if abbrev in extended_script_abbrevs:
new_script_names.append(script_names[idx])
new_script_abbrevs.append(abbrev)
for idx, abbrev in enumerate(script_abbrevs):
if abbrev not in extended_script_abbrevs:
new_script_names.append(script_names[idx])
new_script_abbrevs.append(abbrev)
script_names = new_script_names
script_abbrevs = new_script_abbrevs
reorder_scripts()
# ---------------------------------------------------------------------------
# DERIVED LISTS

View File

@ -252,30 +252,15 @@ def get_other_case(chardata):
# Parse a line of ScriptExtensions.txt
def get_script_extension(chardata):
this_script_list = list(chardata[1].split(' '))
if len(this_script_list) == 1:
return script_abbrevs.index(this_script_list[0])
global last_script_extension
script_numbers = []
for d in this_script_list:
script_numbers.append(script_abbrevs.index(d))
script_numbers.append(0)
script_numbers_length = len(script_numbers)
offset = len(script_lists) * script_list_item_size
if last_script_extension == chardata[1]:
return offset - script_list_item_size
for i in range(1, len(script_lists) - script_numbers_length + 1):
for j in range(0, script_numbers_length):
found = True
if script_lists[i+j] != script_numbers[j]:
found = False
break
if found:
return -i
# Not found in existing lists
return_value = len(script_lists)
script_lists.extend(script_numbers)
return -return_value
last_script_extension = chardata[1]
script_lists.append(tuple(script_abbrevs.index(abbrev) for abbrev in last_script_extension.split(' ')))
return offset
# Read a whole table in memory, setting/checking the Unicode version
@ -538,26 +523,10 @@ file.close()
# multiple scripts. Initialize this list with a single entry, as the zeroth
# element is never used.
script_lists = [0]
script_abbrevs_default = script_abbrevs.index('Zzzz')
scriptx = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, script_abbrevs_default)
# Scan all characters and set their default script extension to the main
# script. We also have to adjust negative scriptx values, following a change in
# the way these work. They are currently negated offsets into the script_lists
# list, but have to be changed into indices in the new ucd_script_sets vector,
# which has fixed-size entries. We can compute the new offset by counting the
# zeros that precede the current offset.
for i in range(0, MAX_UNICODE):
if scriptx[i] == script_abbrevs_default:
scriptx[i] = script[i]
elif scriptx[i] < 0:
count = 1
for j in range(-scriptx[i], 0, -1):
if script_lists[j] == 0:
count += 1
scriptx[i] = -count * (int(len(script_names)/32) + 1)
script_lists = [[]]
script_list_item_size = (script_names.index('Unknown') + 31) // 32
last_script_extension = ""
scriptx = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0)
# With the addition of the Script Extensions field, we needed some padding to
# get the Unicode records up to 12 bytes (multiple of 4). Originally this was a
@ -565,7 +534,7 @@ for i in range(0, MAX_UNICODE):
# are now used for the bidi class, so zero will do.
padding_dummy = [0] * MAX_UNICODE
padding_dummy[0] = 0
padding_dummy[0] = 256
# This block of code was added by PH in September 2012. It scans the other_case
# table to find sets of more than two characters that must all match each other
@ -806,24 +775,19 @@ f.write("""\
const uint32_t PRIV(ucd_script_sets)[] = {
""")
bitword_count = len(script_names)/32 + 1
bitwords = [0] * int(bitword_count)
for d in script_lists:
if d == 0:
s = " "
f.write(" ")
for x in bitwords:
f.write("%s" % s)
s = ", "
f.write("0x%08xu" % x)
f.write(",\n")
bitwords = [0] * int(bitword_count)
bitwords = [0] * script_list_item_size
else:
x = int(d/32)
y = int(d%32)
bitwords[x] = bitwords[x] | (1 << y)
for idx in d:
bitwords[idx // 32] |= 1 << (idx % 31)
s = " "
for x in bitwords:
f.write("%s" % s)
s = ", "
f.write("0x%08xu" % x)
f.write(",\n")
f.write("};\n\n")

View File

@ -64,8 +64,10 @@ for i in range(0, len(break_properties), 2):
f.write(" ucp_gb%s,%s /* %s */\n" % (break_properties[i], sp, break_properties[i+1]))
f.write("};\n\n")
f.write("/* These are the script identifications, additions happen at the end. */\n\nenum {\n")
f.write("/* These are the script identifications. */\n\nenum {\n /* Scripts which has characters in other scripts. */\n")
for i in script_names:
if i == "Unknown":
f.write("\n /* Scripts which has no characters in other scripts. */\n")
f.write(" ucp_%s,\n" % i)
f.write("\n")

View File

@ -92,8 +92,12 @@ std_bidi_class_names = stdnames(bidi_class_names)
# latter is used for the ucp_xx names. NOTE: for the script abbreviations, we
# still use the full original names.
utt_table = list(zip(std_script_names, script_names, ['PT_SCX'] * len(script_names)))
utt_table += list(zip(std_script_abbrevs, script_names, ['PT_SCX'] * len(script_abbrevs)))
scx_end = script_names.index('Unknown')
utt_table = list(zip(std_script_names[0:scx_end], script_names[0:scx_end], ['PT_SCX'] * scx_end))
utt_table += list(zip(std_script_names[scx_end:], script_names[scx_end:], ['PT_SC'] * (len(script_names) - scx_end)))
utt_table += list(zip(std_script_abbrevs[0:scx_end], script_names[0:scx_end], ['PT_SCX'] * scx_end))
utt_table += list(zip(std_script_abbrevs[scx_end:], script_names[scx_end:], ['PT_SC'] * (len(script_names) - scx_end)))
# At lease one script abbreviation is the same as the full name of the script,
# so we must remove duplicates. It doesn't matter if this operation changes the

View File

@ -200,7 +200,6 @@ check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
BOOL negated)
{
BOOL ok;
int scriptx;
const uint32_t *p;
const ucd_record *prop = GET_UCD(c);
@ -221,10 +220,8 @@ switch(ptype)
return (pdata == prop->script) == negated;
case PT_SCX:
scriptx = prop->scriptx;
ok = pdata == prop->script || pdata == (unsigned int)scriptx;
if (!ok && scriptx < 0)
ok = MAPBIT(PRIV(ucd_script_sets) - scriptx, pdata) != 0;
ok = (pdata == prop->script
|| MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, pdata) != 0);
return ok == negated;
/* These are specials */

View File

@ -2206,13 +2206,23 @@ while (bot < top)
{
*pdataptr = PRIV(utt)[i].value;
if (vptr == NULL || ptscript == PT_NOTSCRIPT)
*ptypeptr = PRIV(utt)[i].type;
else
{
if (PRIV(utt)[i].type != PT_SCX) break; /* Non-script found */
*ptypeptr = ptscript;
*ptypeptr = PRIV(utt)[i].type;
return TRUE;
}
return TRUE;
switch (PRIV(utt)[i].type)
{
case PT_SC:
*ptypeptr = PT_SC;
return TRUE;
case PT_SCX:
*ptypeptr = ptscript;
return TRUE;
}
break; /* Non-script found */
}
if (r > 0) bot = i + 1; else top = i;

View File

@ -1194,9 +1194,8 @@ for (;;)
break;
case PT_SCX:
OK = prop->script == code[2] || prop->scriptx == (int)code[2];
if (!OK && prop->scriptx < 0)
OK = MAPBIT(PRIV(ucd_script_sets) - prop->scriptx, code[2]) != 0;
OK = (prop->script == code[2] ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[2]) != 0);
break;
/* These are specials for combination cases. */
@ -1466,9 +1465,8 @@ for (;;)
break;
case PT_SCX:
OK = prop->script == code[3] || prop->scriptx == (int)code[3];
if (!OK && prop->scriptx < 0)
OK = MAPBIT(PRIV(ucd_script_sets) - prop->scriptx, code[3]) != 0;
OK = (prop->script == code[3] ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[3]) != 0);
break;
/* These are specials for combination cases. */
@ -1721,9 +1719,8 @@ for (;;)
break;
case PT_SCX:
OK = prop->script == code[3] || prop->scriptx == (int)code[3];
if (!OK && prop->scriptx < 0)
OK = MAPBIT(PRIV(ucd_script_sets) - prop->scriptx, code[3]) != 0;
OK = (prop->script == code[3] ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[3]) != 0);
break;
/* These are specials for combination cases. */
@ -2001,11 +1998,9 @@ for (;;)
break;
case PT_SCX:
OK = prop->script == code[1 + IMM2_SIZE + 2] ||
prop->scriptx == (int)code[1 + IMM2_SIZE + 2];
if (!OK && prop->scriptx < 0)
OK = MAPBIT(PRIV(ucd_script_sets) - prop->scriptx,
code[1 + IMM2_SIZE + 2]) != 0;
OK = (prop->script == code[1 + IMM2_SIZE + 2] ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx,
code[1 + IMM2_SIZE + 2]) != 0);
break;
/* These are specials for combination cases. */

View File

@ -1822,9 +1822,9 @@ typedef struct {
uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */
uint8_t caseset; /* offset to multichar other cases or zero */
int32_t other_case; /* offset to other case, or zero if none */
int16_t scriptx; /* script extension value */
uint8_t scriptx; /* script extension value */
uint8_t bidi; /* bidi class and control flag */
uint8_t dummy; /* spare - to round to multiple of 4 bytes */
uint16_t dummy; /* spare - to round to multiple of 4 bytes */
} ucd_record;
/* UCD access macros */
@ -1849,8 +1849,8 @@ typedef struct {
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
#define UCD_SCRIPTX(ch) GET_UCD(ch)->scriptx
/* The "scriptx" field, when negative, gives an offset into a vector of 32-bit
words that form a bitmap representing a list of scripts. This macro tests for a
/* The "scriptx" field gives an offset into a vector of 32-bit words that
form a bitmap representing a list of scripts. This macro tests for a
script in the map by number. */
#define MAPBIT(map,script) ((map)[(script)/32]&(1u<<((script)%32)))

View File

@ -2454,11 +2454,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
case PT_SCX:
{
int scriptx = prop->scriptx;
BOOL ok = Fecode[2] == prop->script ||
Fecode[2] == (unsigned int)scriptx;
if (!ok && scriptx < 0)
ok = MAPBIT((PRIV(ucd_script_sets) - scriptx), Fecode[2]) != 0;
BOOL ok = (Fecode[2] == prop->script ||
MAPBIT((PRIV(ucd_script_sets) + prop->scriptx), Fecode[2]) != 0);
if (ok == notmatch) RRETURN(MATCH_NOMATCH);
}
break;
@ -2728,7 +2725,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
for (i = 1; i <= Lmin; i++)
{
BOOL ok;
int scriptx;
const ucd_record *prop;
if (Feptr >= mb->end_subject)
{
@ -2737,10 +2733,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
GETCHARINCTEST(fc, Feptr);
prop = GET_UCD(fc);
scriptx = prop->scriptx;
ok = prop->script == Lpropvalue || scriptx == (int)Lpropvalue;
if (!ok && scriptx < 0)
ok = MAPBIT(PRIV(ucd_script_sets) - scriptx, Lpropvalue) != 0;
ok = (prop->script == Lpropvalue ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
if (ok == notmatch)
RRETURN(MATCH_NOMATCH);
}
@ -3521,7 +3515,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
for (;;)
{
BOOL ok;
int scriptx;
const ucd_record *prop;
RMATCH(Fecode, RM225);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@ -3533,10 +3526,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
GETCHARINCTEST(fc, Feptr);
prop = GET_UCD(fc);
scriptx = prop->scriptx;
ok = prop->script == Lpropvalue || scriptx == (int)Lpropvalue;
if (!ok && scriptx < 0)
ok = MAPBIT(PRIV(ucd_script_sets) - scriptx, Lpropvalue) != 0;
ok = (prop->script == Lpropvalue
|| MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
if (ok == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
@ -4104,7 +4095,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
{
BOOL ok;
const ucd_record *prop;
int scriptx;
int len = 1;
if (Feptr >= mb->end_subject)
{
@ -4113,10 +4103,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
}
GETCHARLENTEST(fc, Feptr, len);
prop = GET_UCD(fc);
scriptx = prop->scriptx;
ok = prop->script == Lpropvalue || scriptx == (int)Lpropvalue;
if (!ok && scriptx < 0)
ok = MAPBIT(PRIV(ucd_script_sets) - scriptx, Lpropvalue) != 0;
ok = (prop->script == Lpropvalue ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
if (ok == notmatch) break;
Feptr+= len;
}

File diff suppressed because it is too large Load Diff

View File

@ -149,57 +149,37 @@ enum {
ucp_gbExtended_Pictographic, /* 14 */
};
/* These are the script identifications, additions happen at the end. */
/* These are the script identifications. */
enum {
ucp_Unknown,
/* Scripts which has characters in other scripts. */
ucp_Arabic,
ucp_Armenian,
ucp_Bengali,
ucp_Bopomofo,
ucp_Braille,
ucp_Buginese,
ucp_Buhid,
ucp_Canadian_Aboriginal,
ucp_Cherokee,
ucp_Common,
ucp_Coptic,
ucp_Cypriot,
ucp_Cyrillic,
ucp_Deseret,
ucp_Devanagari,
ucp_Ethiopic,
ucp_Georgian,
ucp_Glagolitic,
ucp_Gothic,
ucp_Greek,
ucp_Gujarati,
ucp_Gurmukhi,
ucp_Han,
ucp_Hangul,
ucp_Hanunoo,
ucp_Hebrew,
ucp_Hiragana,
ucp_Inherited,
ucp_Kannada,
ucp_Katakana,
ucp_Kharoshthi,
ucp_Khmer,
ucp_Lao,
ucp_Latin,
ucp_Limbu,
ucp_Linear_B,
ucp_Malayalam,
ucp_Mongolian,
ucp_Myanmar,
ucp_New_Tai_Lue,
ucp_Ogham,
ucp_Old_Italic,
ucp_Old_Persian,
ucp_Oriya,
ucp_Osmanya,
ucp_Runic,
ucp_Shavian,
ucp_Sinhala,
ucp_Syloti_Nagri,
ucp_Syriac,
@ -209,19 +189,70 @@ enum {
ucp_Tamil,
ucp_Telugu,
ucp_Thaana,
ucp_Yi,
ucp_Nko,
ucp_Phags_Pa,
ucp_Kayah_Li,
ucp_Javanese,
ucp_Kaithi,
ucp_Mandaic,
ucp_Chakma,
ucp_Sharada,
ucp_Takri,
ucp_Duployan,
ucp_Grantha,
ucp_Khojki,
ucp_Khudawadi,
ucp_Linear_A,
ucp_Mahajani,
ucp_Manichaean,
ucp_Modi,
ucp_Old_Permic,
ucp_Psalter_Pahlavi,
ucp_Tirhuta,
ucp_Multani,
ucp_Adlam,
ucp_Masaram_Gondi,
ucp_Dogra,
ucp_Gunjala_Gondi,
ucp_Hanifi_Rohingya,
ucp_Sogdian,
ucp_Nandinagari,
ucp_Yezidi,
ucp_Cypro_Minoan,
ucp_Old_Uyghur,
/* Scripts which has no characters in other scripts. */
ucp_Unknown,
ucp_Armenian,
ucp_Braille,
ucp_Canadian_Aboriginal,
ucp_Cherokee,
ucp_Common,
ucp_Deseret,
ucp_Ethiopic,
ucp_Gothic,
ucp_Hebrew,
ucp_Inherited,
ucp_Kharoshthi,
ucp_Khmer,
ucp_Lao,
ucp_New_Tai_Lue,
ucp_Ogham,
ucp_Old_Italic,
ucp_Old_Persian,
ucp_Osmanya,
ucp_Runic,
ucp_Shavian,
ucp_Thai,
ucp_Tibetan,
ucp_Tifinagh,
ucp_Ugaritic,
ucp_Yi,
ucp_Balinese,
ucp_Cuneiform,
ucp_Nko,
ucp_Phags_Pa,
ucp_Phoenician,
ucp_Carian,
ucp_Cham,
ucp_Kayah_Li,
ucp_Lepcha,
ucp_Lycian,
ucp_Lydian,
@ -236,8 +267,6 @@ enum {
ucp_Imperial_Aramaic,
ucp_Inscriptional_Pahlavi,
ucp_Inscriptional_Parthian,
ucp_Javanese,
ucp_Kaithi,
ucp_Lisu,
ucp_Meetei_Mayek,
ucp_Old_South_Arabian,
@ -247,70 +276,44 @@ enum {
ucp_Tai_Viet,
ucp_Batak,
ucp_Brahmi,
ucp_Mandaic,
ucp_Chakma,
ucp_Meroitic_Cursive,
ucp_Meroitic_Hieroglyphs,
ucp_Miao,
ucp_Sharada,
ucp_Sora_Sompeng,
ucp_Takri,
ucp_Bassa_Vah,
ucp_Caucasian_Albanian,
ucp_Duployan,
ucp_Elbasan,
ucp_Grantha,
ucp_Khojki,
ucp_Khudawadi,
ucp_Linear_A,
ucp_Mahajani,
ucp_Manichaean,
ucp_Mende_Kikakui,
ucp_Modi,
ucp_Mro,
ucp_Nabataean,
ucp_Old_North_Arabian,
ucp_Old_Permic,
ucp_Pahawh_Hmong,
ucp_Palmyrene,
ucp_Psalter_Pahlavi,
ucp_Pau_Cin_Hau,
ucp_Siddham,
ucp_Tirhuta,
ucp_Warang_Citi,
ucp_Ahom,
ucp_Anatolian_Hieroglyphs,
ucp_Hatran,
ucp_Multani,
ucp_Old_Hungarian,
ucp_SignWriting,
ucp_Adlam,
ucp_Bhaiksuki,
ucp_Marchen,
ucp_Newa,
ucp_Osage,
ucp_Tangut,
ucp_Masaram_Gondi,
ucp_Nushu,
ucp_Soyombo,
ucp_Zanabazar_Square,
ucp_Dogra,
ucp_Gunjala_Gondi,
ucp_Hanifi_Rohingya,
ucp_Makasar,
ucp_Medefaidrin,
ucp_Old_Sogdian,
ucp_Sogdian,
ucp_Elymaic,
ucp_Nandinagari,
ucp_Nyiakeng_Puachue_Hmong,
ucp_Wancho,
ucp_Chorasmian,
ucp_Dives_Akuru,
ucp_Khitan_Small_Script,
ucp_Yezidi,
ucp_Cypro_Minoan,
ucp_Old_Uyghur,
ucp_Tangsa,
ucp_Toto,
ucp_Vithkuqi,

View File

@ -833,29 +833,29 @@ const char PRIV(utt_names)[] =
const ucp_type_table PRIV(utt)[] = {
{ 0, PT_SCX, ucp_Adlam },
{ 6, PT_SCX, ucp_Adlam },
{ 11, PT_SCX, ucp_Caucasian_Albanian },
{ 16, PT_SCX, ucp_Ahom },
{ 21, PT_SCX, ucp_Anatolian_Hieroglyphs },
{ 11, PT_SC, ucp_Caucasian_Albanian },
{ 16, PT_SC, ucp_Ahom },
{ 21, PT_SC, ucp_Anatolian_Hieroglyphs },
{ 42, PT_ANY, 0 },
{ 46, PT_SCX, ucp_Arabic },
{ 51, PT_SCX, ucp_Arabic },
{ 58, PT_SCX, ucp_Armenian },
{ 67, PT_SCX, ucp_Imperial_Aramaic },
{ 72, PT_SCX, ucp_Armenian },
{ 77, PT_SCX, ucp_Avestan },
{ 85, PT_SCX, ucp_Avestan },
{ 90, PT_SCX, ucp_Balinese },
{ 95, PT_SCX, ucp_Balinese },
{ 104, PT_SCX, ucp_Bamum },
{ 109, PT_SCX, ucp_Bamum },
{ 115, PT_SCX, ucp_Bassa_Vah },
{ 120, PT_SCX, ucp_Bassa_Vah },
{ 129, PT_SCX, ucp_Batak },
{ 135, PT_SCX, ucp_Batak },
{ 58, PT_SC, ucp_Armenian },
{ 67, PT_SC, ucp_Imperial_Aramaic },
{ 72, PT_SC, ucp_Armenian },
{ 77, PT_SC, ucp_Avestan },
{ 85, PT_SC, ucp_Avestan },
{ 90, PT_SC, ucp_Balinese },
{ 95, PT_SC, ucp_Balinese },
{ 104, PT_SC, ucp_Bamum },
{ 109, PT_SC, ucp_Bamum },
{ 115, PT_SC, ucp_Bassa_Vah },
{ 120, PT_SC, ucp_Bassa_Vah },
{ 129, PT_SC, ucp_Batak },
{ 135, PT_SC, ucp_Batak },
{ 140, PT_SCX, ucp_Bengali },
{ 145, PT_SCX, ucp_Bengali },
{ 153, PT_SCX, ucp_Bhaiksuki },
{ 163, PT_SCX, ucp_Bhaiksuki },
{ 153, PT_SC, ucp_Bhaiksuki },
{ 163, PT_SC, ucp_Bhaiksuki },
{ 168, PT_BIDICL, ucp_bidiAL },
{ 175, PT_BIDICL, ucp_bidiAN },
{ 182, PT_BIDICL, ucp_bidiB },
@ -883,68 +883,68 @@ const ucp_type_table PRIV(utt)[] = {
{ 346, PT_BIDICL, ucp_bidiWS },
{ 353, PT_SCX, ucp_Bopomofo },
{ 358, PT_SCX, ucp_Bopomofo },
{ 367, PT_SCX, ucp_Brahmi },
{ 372, PT_SCX, ucp_Brahmi },
{ 379, PT_SCX, ucp_Braille },
{ 384, PT_SCX, ucp_Braille },
{ 367, PT_SC, ucp_Brahmi },
{ 372, PT_SC, ucp_Brahmi },
{ 379, PT_SC, ucp_Braille },
{ 384, PT_SC, ucp_Braille },
{ 392, PT_SCX, ucp_Buginese },
{ 397, PT_SCX, ucp_Buginese },
{ 406, PT_SCX, ucp_Buhid },
{ 411, PT_SCX, ucp_Buhid },
{ 417, PT_GC, ucp_C },
{ 419, PT_SCX, ucp_Chakma },
{ 424, PT_SCX, ucp_Canadian_Aboriginal },
{ 443, PT_SCX, ucp_Canadian_Aboriginal },
{ 448, PT_SCX, ucp_Carian },
{ 453, PT_SCX, ucp_Carian },
{ 460, PT_SCX, ucp_Caucasian_Albanian },
{ 424, PT_SC, ucp_Canadian_Aboriginal },
{ 443, PT_SC, ucp_Canadian_Aboriginal },
{ 448, PT_SC, ucp_Carian },
{ 453, PT_SC, ucp_Carian },
{ 460, PT_SC, ucp_Caucasian_Albanian },
{ 478, PT_PC, ucp_Cc },
{ 481, PT_PC, ucp_Cf },
{ 484, PT_SCX, ucp_Chakma },
{ 491, PT_SCX, ucp_Cham },
{ 496, PT_SCX, ucp_Cherokee },
{ 501, PT_SCX, ucp_Cherokee },
{ 510, PT_SCX, ucp_Chorasmian },
{ 521, PT_SCX, ucp_Chorasmian },
{ 491, PT_SC, ucp_Cham },
{ 496, PT_SC, ucp_Cherokee },
{ 501, PT_SC, ucp_Cherokee },
{ 510, PT_SC, ucp_Chorasmian },
{ 521, PT_SC, ucp_Chorasmian },
{ 526, PT_PC, ucp_Cn },
{ 529, PT_PC, ucp_Co },
{ 532, PT_SCX, ucp_Common },
{ 532, PT_SC, ucp_Common },
{ 539, PT_SCX, ucp_Coptic },
{ 544, PT_SCX, ucp_Coptic },
{ 551, PT_SCX, ucp_Cypro_Minoan },
{ 556, PT_SCX, ucp_Cypriot },
{ 561, PT_PC, ucp_Cs },
{ 564, PT_SCX, ucp_Cuneiform },
{ 564, PT_SC, ucp_Cuneiform },
{ 574, PT_SCX, ucp_Cypriot },
{ 582, PT_SCX, ucp_Cypro_Minoan },
{ 594, PT_SCX, ucp_Cyrillic },
{ 603, PT_SCX, ucp_Cyrillic },
{ 608, PT_SCX, ucp_Deseret },
{ 608, PT_SC, ucp_Deseret },
{ 616, PT_SCX, ucp_Devanagari },
{ 621, PT_SCX, ucp_Devanagari },
{ 632, PT_SCX, ucp_Dives_Akuru },
{ 637, PT_SCX, ucp_Dives_Akuru },
{ 632, PT_SC, ucp_Dives_Akuru },
{ 637, PT_SC, ucp_Dives_Akuru },
{ 648, PT_SCX, ucp_Dogra },
{ 653, PT_SCX, ucp_Dogra },
{ 659, PT_SCX, ucp_Deseret },
{ 659, PT_SC, ucp_Deseret },
{ 664, PT_SCX, ucp_Duployan },
{ 669, PT_SCX, ucp_Duployan },
{ 678, PT_SCX, ucp_Egyptian_Hieroglyphs },
{ 683, PT_SCX, ucp_Egyptian_Hieroglyphs },
{ 703, PT_SCX, ucp_Elbasan },
{ 708, PT_SCX, ucp_Elbasan },
{ 716, PT_SCX, ucp_Elymaic },
{ 721, PT_SCX, ucp_Elymaic },
{ 729, PT_SCX, ucp_Ethiopic },
{ 734, PT_SCX, ucp_Ethiopic },
{ 678, PT_SC, ucp_Egyptian_Hieroglyphs },
{ 683, PT_SC, ucp_Egyptian_Hieroglyphs },
{ 703, PT_SC, ucp_Elbasan },
{ 708, PT_SC, ucp_Elbasan },
{ 716, PT_SC, ucp_Elymaic },
{ 721, PT_SC, ucp_Elymaic },
{ 729, PT_SC, ucp_Ethiopic },
{ 734, PT_SC, ucp_Ethiopic },
{ 743, PT_SCX, ucp_Georgian },
{ 748, PT_SCX, ucp_Georgian },
{ 757, PT_SCX, ucp_Glagolitic },
{ 762, PT_SCX, ucp_Glagolitic },
{ 773, PT_SCX, ucp_Gunjala_Gondi },
{ 778, PT_SCX, ucp_Masaram_Gondi },
{ 783, PT_SCX, ucp_Gothic },
{ 788, PT_SCX, ucp_Gothic },
{ 783, PT_SC, ucp_Gothic },
{ 788, PT_SC, ucp_Gothic },
{ 795, PT_SCX, ucp_Grantha },
{ 800, PT_SCX, ucp_Grantha },
{ 808, PT_SCX, ucp_Greek },
@ -961,21 +961,21 @@ const ucp_type_table PRIV(utt)[] = {
{ 881, PT_SCX, ucp_Hanifi_Rohingya },
{ 896, PT_SCX, ucp_Hanunoo },
{ 901, PT_SCX, ucp_Hanunoo },
{ 909, PT_SCX, ucp_Hatran },
{ 914, PT_SCX, ucp_Hatran },
{ 921, PT_SCX, ucp_Hebrew },
{ 926, PT_SCX, ucp_Hebrew },
{ 909, PT_SC, ucp_Hatran },
{ 914, PT_SC, ucp_Hatran },
{ 921, PT_SC, ucp_Hebrew },
{ 926, PT_SC, ucp_Hebrew },
{ 933, PT_SCX, ucp_Hiragana },
{ 938, PT_SCX, ucp_Hiragana },
{ 947, PT_SCX, ucp_Anatolian_Hieroglyphs },
{ 952, PT_SCX, ucp_Pahawh_Hmong },
{ 957, PT_SCX, ucp_Nyiakeng_Puachue_Hmong },
{ 962, PT_SCX, ucp_Old_Hungarian },
{ 967, PT_SCX, ucp_Imperial_Aramaic },
{ 983, PT_SCX, ucp_Inherited },
{ 993, PT_SCX, ucp_Inscriptional_Pahlavi },
{ 1014, PT_SCX, ucp_Inscriptional_Parthian },
{ 1036, PT_SCX, ucp_Old_Italic },
{ 947, PT_SC, ucp_Anatolian_Hieroglyphs },
{ 952, PT_SC, ucp_Pahawh_Hmong },
{ 957, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
{ 962, PT_SC, ucp_Old_Hungarian },
{ 967, PT_SC, ucp_Imperial_Aramaic },
{ 983, PT_SC, ucp_Inherited },
{ 993, PT_SC, ucp_Inscriptional_Pahlavi },
{ 1014, PT_SC, ucp_Inscriptional_Parthian },
{ 1036, PT_SC, ucp_Old_Italic },
{ 1041, PT_SCX, ucp_Javanese },
{ 1046, PT_SCX, ucp_Javanese },
{ 1055, PT_SCX, ucp_Kaithi },
@ -984,123 +984,123 @@ const ucp_type_table PRIV(utt)[] = {
{ 1072, PT_SCX, ucp_Kannada },
{ 1080, PT_SCX, ucp_Katakana },
{ 1089, PT_SCX, ucp_Kayah_Li },
{ 1097, PT_SCX, ucp_Kharoshthi },
{ 1102, PT_SCX, ucp_Kharoshthi },
{ 1113, PT_SCX, ucp_Khitan_Small_Script },
{ 1131, PT_SCX, ucp_Khmer },
{ 1137, PT_SCX, ucp_Khmer },
{ 1097, PT_SC, ucp_Kharoshthi },
{ 1102, PT_SC, ucp_Kharoshthi },
{ 1113, PT_SC, ucp_Khitan_Small_Script },
{ 1131, PT_SC, ucp_Khmer },
{ 1137, PT_SC, ucp_Khmer },
{ 1142, PT_SCX, ucp_Khojki },
{ 1147, PT_SCX, ucp_Khojki },
{ 1154, PT_SCX, ucp_Khudawadi },
{ 1164, PT_SCX, ucp_Khitan_Small_Script },
{ 1164, PT_SC, ucp_Khitan_Small_Script },
{ 1169, PT_SCX, ucp_Kannada },
{ 1174, PT_SCX, ucp_Kaithi },
{ 1179, PT_GC, ucp_L },
{ 1181, PT_LAMP, 0 },
{ 1184, PT_SCX, ucp_Tai_Tham },
{ 1189, PT_SCX, ucp_Lao },
{ 1193, PT_SCX, ucp_Lao },
{ 1184, PT_SC, ucp_Tai_Tham },
{ 1189, PT_SC, ucp_Lao },
{ 1193, PT_SC, ucp_Lao },
{ 1198, PT_SCX, ucp_Latin },
{ 1204, PT_SCX, ucp_Latin },
{ 1209, PT_LAMP, 0 },
{ 1212, PT_SCX, ucp_Lepcha },
{ 1217, PT_SCX, ucp_Lepcha },
{ 1212, PT_SC, ucp_Lepcha },
{ 1217, PT_SC, ucp_Lepcha },
{ 1224, PT_SCX, ucp_Limbu },
{ 1229, PT_SCX, ucp_Limbu },
{ 1235, PT_SCX, ucp_Linear_A },
{ 1240, PT_SCX, ucp_Linear_B },
{ 1245, PT_SCX, ucp_Linear_A },
{ 1253, PT_SCX, ucp_Linear_B },
{ 1261, PT_SCX, ucp_Lisu },
{ 1261, PT_SC, ucp_Lisu },
{ 1266, PT_PC, ucp_Ll },
{ 1269, PT_PC, ucp_Lm },
{ 1272, PT_PC, ucp_Lo },
{ 1275, PT_PC, ucp_Lt },
{ 1278, PT_PC, ucp_Lu },
{ 1281, PT_SCX, ucp_Lycian },
{ 1286, PT_SCX, ucp_Lycian },
{ 1293, PT_SCX, ucp_Lydian },
{ 1298, PT_SCX, ucp_Lydian },
{ 1281, PT_SC, ucp_Lycian },
{ 1286, PT_SC, ucp_Lycian },
{ 1293, PT_SC, ucp_Lydian },
{ 1298, PT_SC, ucp_Lydian },
{ 1305, PT_GC, ucp_M },
{ 1307, PT_SCX, ucp_Mahajani },
{ 1316, PT_SCX, ucp_Mahajani },
{ 1321, PT_SCX, ucp_Makasar },
{ 1326, PT_SCX, ucp_Makasar },
{ 1321, PT_SC, ucp_Makasar },
{ 1326, PT_SC, ucp_Makasar },
{ 1334, PT_SCX, ucp_Malayalam },
{ 1344, PT_SCX, ucp_Mandaic },
{ 1349, PT_SCX, ucp_Mandaic },
{ 1357, PT_SCX, ucp_Manichaean },
{ 1362, PT_SCX, ucp_Manichaean },
{ 1373, PT_SCX, ucp_Marchen },
{ 1378, PT_SCX, ucp_Marchen },
{ 1373, PT_SC, ucp_Marchen },
{ 1378, PT_SC, ucp_Marchen },
{ 1386, PT_SCX, ucp_Masaram_Gondi },
{ 1399, PT_PC, ucp_Mc },
{ 1402, PT_PC, ucp_Me },
{ 1405, PT_SCX, ucp_Medefaidrin },
{ 1417, PT_SCX, ucp_Medefaidrin },
{ 1422, PT_SCX, ucp_Meetei_Mayek },
{ 1434, PT_SCX, ucp_Mende_Kikakui },
{ 1439, PT_SCX, ucp_Mende_Kikakui },
{ 1452, PT_SCX, ucp_Meroitic_Cursive },
{ 1457, PT_SCX, ucp_Meroitic_Hieroglyphs },
{ 1462, PT_SCX, ucp_Meroitic_Cursive },
{ 1478, PT_SCX, ucp_Meroitic_Hieroglyphs },
{ 1498, PT_SCX, ucp_Miao },
{ 1405, PT_SC, ucp_Medefaidrin },
{ 1417, PT_SC, ucp_Medefaidrin },
{ 1422, PT_SC, ucp_Meetei_Mayek },
{ 1434, PT_SC, ucp_Mende_Kikakui },
{ 1439, PT_SC, ucp_Mende_Kikakui },
{ 1452, PT_SC, ucp_Meroitic_Cursive },
{ 1457, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 1462, PT_SC, ucp_Meroitic_Cursive },
{ 1478, PT_SC, ucp_Meroitic_Hieroglyphs },
{ 1498, PT_SC, ucp_Miao },
{ 1503, PT_SCX, ucp_Malayalam },
{ 1508, PT_PC, ucp_Mn },
{ 1511, PT_SCX, ucp_Modi },
{ 1516, PT_SCX, ucp_Mongolian },
{ 1521, PT_SCX, ucp_Mongolian },
{ 1531, PT_SCX, ucp_Mro },
{ 1535, PT_SCX, ucp_Mro },
{ 1540, PT_SCX, ucp_Meetei_Mayek },
{ 1531, PT_SC, ucp_Mro },
{ 1535, PT_SC, ucp_Mro },
{ 1540, PT_SC, ucp_Meetei_Mayek },
{ 1545, PT_SCX, ucp_Multani },
{ 1550, PT_SCX, ucp_Multani },
{ 1558, PT_SCX, ucp_Myanmar },
{ 1566, PT_SCX, ucp_Myanmar },
{ 1571, PT_GC, ucp_N },
{ 1573, PT_SCX, ucp_Nabataean },
{ 1573, PT_SC, ucp_Nabataean },
{ 1583, PT_SCX, ucp_Nandinagari },
{ 1588, PT_SCX, ucp_Nandinagari },
{ 1600, PT_SCX, ucp_Old_North_Arabian },
{ 1605, PT_SCX, ucp_Nabataean },
{ 1600, PT_SC, ucp_Old_North_Arabian },
{ 1605, PT_SC, ucp_Nabataean },
{ 1610, PT_PC, ucp_Nd },
{ 1613, PT_SCX, ucp_Newa },
{ 1618, PT_SCX, ucp_New_Tai_Lue },
{ 1613, PT_SC, ucp_Newa },
{ 1618, PT_SC, ucp_New_Tai_Lue },
{ 1628, PT_SCX, ucp_Nko },
{ 1632, PT_SCX, ucp_Nko },
{ 1637, PT_PC, ucp_Nl },
{ 1640, PT_PC, ucp_No },
{ 1643, PT_SCX, ucp_Nushu },
{ 1648, PT_SCX, ucp_Nushu },
{ 1654, PT_SCX, ucp_Nyiakeng_Puachue_Hmong },
{ 1675, PT_SCX, ucp_Ogham },
{ 1680, PT_SCX, ucp_Ogham },
{ 1686, PT_SCX, ucp_Ol_Chiki },
{ 1694, PT_SCX, ucp_Ol_Chiki },
{ 1699, PT_SCX, ucp_Old_Hungarian },
{ 1712, PT_SCX, ucp_Old_Italic },
{ 1722, PT_SCX, ucp_Old_North_Arabian },
{ 1643, PT_SC, ucp_Nushu },
{ 1648, PT_SC, ucp_Nushu },
{ 1654, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
{ 1675, PT_SC, ucp_Ogham },
{ 1680, PT_SC, ucp_Ogham },
{ 1686, PT_SC, ucp_Ol_Chiki },
{ 1694, PT_SC, ucp_Ol_Chiki },
{ 1699, PT_SC, ucp_Old_Hungarian },
{ 1712, PT_SC, ucp_Old_Italic },
{ 1722, PT_SC, ucp_Old_North_Arabian },
{ 1738, PT_SCX, ucp_Old_Permic },
{ 1748, PT_SCX, ucp_Old_Persian },
{ 1759, PT_SCX, ucp_Old_Sogdian },
{ 1770, PT_SCX, ucp_Old_South_Arabian },
{ 1786, PT_SCX, ucp_Old_Turkic },
{ 1748, PT_SC, ucp_Old_Persian },
{ 1759, PT_SC, ucp_Old_Sogdian },
{ 1770, PT_SC, ucp_Old_South_Arabian },
{ 1786, PT_SC, ucp_Old_Turkic },
{ 1796, PT_SCX, ucp_Old_Uyghur },
{ 1806, PT_SCX, ucp_Oriya },
{ 1812, PT_SCX, ucp_Old_Turkic },
{ 1812, PT_SC, ucp_Old_Turkic },
{ 1817, PT_SCX, ucp_Oriya },
{ 1822, PT_SCX, ucp_Osage },
{ 1828, PT_SCX, ucp_Osage },
{ 1833, PT_SCX, ucp_Osmanya },
{ 1838, PT_SCX, ucp_Osmanya },
{ 1822, PT_SC, ucp_Osage },
{ 1828, PT_SC, ucp_Osage },
{ 1833, PT_SC, ucp_Osmanya },
{ 1838, PT_SC, ucp_Osmanya },
{ 1846, PT_SCX, ucp_Old_Uyghur },
{ 1851, PT_GC, ucp_P },
{ 1853, PT_SCX, ucp_Pahawh_Hmong },
{ 1865, PT_SCX, ucp_Palmyrene },
{ 1870, PT_SCX, ucp_Palmyrene },
{ 1880, PT_SCX, ucp_Pau_Cin_Hau },
{ 1885, PT_SCX, ucp_Pau_Cin_Hau },
{ 1853, PT_SC, ucp_Pahawh_Hmong },
{ 1865, PT_SC, ucp_Palmyrene },
{ 1870, PT_SC, ucp_Palmyrene },
{ 1880, PT_SC, ucp_Pau_Cin_Hau },
{ 1885, PT_SC, ucp_Pau_Cin_Hau },
{ 1895, PT_PC, ucp_Pc },
{ 1898, PT_PC, ucp_Pd },
{ 1901, PT_PC, ucp_Pe },
@ -1108,36 +1108,36 @@ const ucp_type_table PRIV(utt)[] = {
{ 1909, PT_PC, ucp_Pf },
{ 1912, PT_SCX, ucp_Phags_Pa },
{ 1917, PT_SCX, ucp_Phags_Pa },
{ 1925, PT_SCX, ucp_Inscriptional_Pahlavi },
{ 1925, PT_SC, ucp_Inscriptional_Pahlavi },
{ 1930, PT_SCX, ucp_Psalter_Pahlavi },
{ 1935, PT_SCX, ucp_Phoenician },
{ 1940, PT_SCX, ucp_Phoenician },
{ 1935, PT_SC, ucp_Phoenician },
{ 1940, PT_SC, ucp_Phoenician },
{ 1951, PT_PC, ucp_Pi },
{ 1954, PT_SCX, ucp_Miao },
{ 1954, PT_SC, ucp_Miao },
{ 1959, PT_PC, ucp_Po },
{ 1962, PT_SCX, ucp_Inscriptional_Parthian },
{ 1962, PT_SC, ucp_Inscriptional_Parthian },
{ 1967, PT_PC, ucp_Ps },
{ 1970, PT_SCX, ucp_Psalter_Pahlavi },
{ 1985, PT_SCX, ucp_Rejang },
{ 1992, PT_SCX, ucp_Rejang },
{ 1985, PT_SC, ucp_Rejang },
{ 1992, PT_SC, ucp_Rejang },
{ 1997, PT_SCX, ucp_Hanifi_Rohingya },
{ 2002, PT_SCX, ucp_Runic },
{ 2008, PT_SCX, ucp_Runic },
{ 2002, PT_SC, ucp_Runic },
{ 2008, PT_SC, ucp_Runic },
{ 2013, PT_GC, ucp_S },
{ 2015, PT_SCX, ucp_Samaritan },
{ 2025, PT_SCX, ucp_Samaritan },
{ 2030, PT_SCX, ucp_Old_South_Arabian },
{ 2035, PT_SCX, ucp_Saurashtra },
{ 2040, PT_SCX, ucp_Saurashtra },
{ 2015, PT_SC, ucp_Samaritan },
{ 2025, PT_SC, ucp_Samaritan },
{ 2030, PT_SC, ucp_Old_South_Arabian },
{ 2035, PT_SC, ucp_Saurashtra },
{ 2040, PT_SC, ucp_Saurashtra },
{ 2051, PT_PC, ucp_Sc },
{ 2054, PT_SCX, ucp_SignWriting },
{ 2054, PT_SC, ucp_SignWriting },
{ 2059, PT_SCX, ucp_Sharada },
{ 2067, PT_SCX, ucp_Shavian },
{ 2075, PT_SCX, ucp_Shavian },
{ 2067, PT_SC, ucp_Shavian },
{ 2075, PT_SC, ucp_Shavian },
{ 2080, PT_SCX, ucp_Sharada },
{ 2085, PT_SCX, ucp_Siddham },
{ 2090, PT_SCX, ucp_Siddham },
{ 2098, PT_SCX, ucp_SignWriting },
{ 2085, PT_SC, ucp_Siddham },
{ 2090, PT_SC, ucp_Siddham },
{ 2098, PT_SC, ucp_SignWriting },
{ 2110, PT_SCX, ucp_Khudawadi },
{ 2115, PT_SCX, ucp_Sinhala },
{ 2120, PT_SCX, ucp_Sinhala },
@ -1146,13 +1146,13 @@ const ucp_type_table PRIV(utt)[] = {
{ 2134, PT_PC, ucp_So },
{ 2137, PT_SCX, ucp_Sogdian },
{ 2142, PT_SCX, ucp_Sogdian },
{ 2150, PT_SCX, ucp_Old_Sogdian },
{ 2155, PT_SCX, ucp_Sora_Sompeng },
{ 2160, PT_SCX, ucp_Sora_Sompeng },
{ 2172, PT_SCX, ucp_Soyombo },
{ 2177, PT_SCX, ucp_Soyombo },
{ 2185, PT_SCX, ucp_Sundanese },
{ 2190, PT_SCX, ucp_Sundanese },
{ 2150, PT_SC, ucp_Old_Sogdian },
{ 2155, PT_SC, ucp_Sora_Sompeng },
{ 2160, PT_SC, ucp_Sora_Sompeng },
{ 2172, PT_SC, ucp_Soyombo },
{ 2177, PT_SC, ucp_Soyombo },
{ 2185, PT_SC, ucp_Sundanese },
{ 2190, PT_SC, ucp_Sundanese },
{ 2200, PT_SCX, ucp_Syloti_Nagri },
{ 2205, PT_SCX, ucp_Syloti_Nagri },
{ 2217, PT_SCX, ucp_Syriac },
@ -1161,48 +1161,48 @@ const ucp_type_table PRIV(utt)[] = {
{ 2237, PT_SCX, ucp_Tagbanwa },
{ 2242, PT_SCX, ucp_Tagbanwa },
{ 2251, PT_SCX, ucp_Tai_Le },
{ 2257, PT_SCX, ucp_Tai_Tham },
{ 2265, PT_SCX, ucp_Tai_Viet },
{ 2257, PT_SC, ucp_Tai_Tham },
{ 2265, PT_SC, ucp_Tai_Viet },
{ 2273, PT_SCX, ucp_Takri },
{ 2278, PT_SCX, ucp_Takri },
{ 2284, PT_SCX, ucp_Tai_Le },
{ 2289, PT_SCX, ucp_New_Tai_Lue },
{ 2289, PT_SC, ucp_New_Tai_Lue },
{ 2294, PT_SCX, ucp_Tamil },
{ 2300, PT_SCX, ucp_Tamil },
{ 2305, PT_SCX, ucp_Tangut },
{ 2310, PT_SCX, ucp_Tangsa },
{ 2317, PT_SCX, ucp_Tangut },
{ 2324, PT_SCX, ucp_Tai_Viet },
{ 2305, PT_SC, ucp_Tangut },
{ 2310, PT_SC, ucp_Tangsa },
{ 2317, PT_SC, ucp_Tangut },
{ 2324, PT_SC, ucp_Tai_Viet },
{ 2329, PT_SCX, ucp_Telugu },
{ 2334, PT_SCX, ucp_Telugu },
{ 2341, PT_SCX, ucp_Tifinagh },
{ 2341, PT_SC, ucp_Tifinagh },
{ 2346, PT_SCX, ucp_Tagalog },
{ 2351, PT_SCX, ucp_Thaana },
{ 2356, PT_SCX, ucp_Thaana },
{ 2363, PT_SCX, ucp_Thai },
{ 2368, PT_SCX, ucp_Tibetan },
{ 2376, PT_SCX, ucp_Tibetan },
{ 2381, PT_SCX, ucp_Tifinagh },
{ 2363, PT_SC, ucp_Thai },
{ 2368, PT_SC, ucp_Tibetan },
{ 2376, PT_SC, ucp_Tibetan },
{ 2381, PT_SC, ucp_Tifinagh },
{ 2390, PT_SCX, ucp_Tirhuta },
{ 2395, PT_SCX, ucp_Tirhuta },
{ 2403, PT_SCX, ucp_Tangsa },
{ 2408, PT_SCX, ucp_Toto },
{ 2413, PT_SCX, ucp_Ugaritic },
{ 2418, PT_SCX, ucp_Ugaritic },
{ 2427, PT_SCX, ucp_Unknown },
{ 2435, PT_SCX, ucp_Vai },
{ 2439, PT_SCX, ucp_Vai },
{ 2444, PT_SCX, ucp_Vithkuqi },
{ 2449, PT_SCX, ucp_Vithkuqi },
{ 2458, PT_SCX, ucp_Wancho },
{ 2465, PT_SCX, ucp_Warang_Citi },
{ 2470, PT_SCX, ucp_Warang_Citi },
{ 2481, PT_SCX, ucp_Wancho },
{ 2403, PT_SC, ucp_Tangsa },
{ 2408, PT_SC, ucp_Toto },
{ 2413, PT_SC, ucp_Ugaritic },
{ 2418, PT_SC, ucp_Ugaritic },
{ 2427, PT_SC, ucp_Unknown },
{ 2435, PT_SC, ucp_Vai },
{ 2439, PT_SC, ucp_Vai },
{ 2444, PT_SC, ucp_Vithkuqi },
{ 2449, PT_SC, ucp_Vithkuqi },
{ 2458, PT_SC, ucp_Wancho },
{ 2465, PT_SC, ucp_Warang_Citi },
{ 2470, PT_SC, ucp_Warang_Citi },
{ 2481, PT_SC, ucp_Wancho },
{ 2486, PT_ALNUM, 0 },
{ 2490, PT_SCX, ucp_Old_Persian },
{ 2490, PT_SC, ucp_Old_Persian },
{ 2495, PT_PXSPACE, 0 },
{ 2499, PT_SPACE, 0 },
{ 2503, PT_SCX, ucp_Cuneiform },
{ 2503, PT_SC, ucp_Cuneiform },
{ 2508, PT_UCNC, 0 },
{ 2512, PT_WORD, 0 },
{ 2516, PT_SCX, ucp_Yezidi },
@ -1210,14 +1210,14 @@ const ucp_type_table PRIV(utt)[] = {
{ 2528, PT_SCX, ucp_Yi },
{ 2531, PT_SCX, ucp_Yi },
{ 2536, PT_GC, ucp_Z },
{ 2538, PT_SCX, ucp_Zanabazar_Square },
{ 2554, PT_SCX, ucp_Zanabazar_Square },
{ 2559, PT_SCX, ucp_Inherited },
{ 2538, PT_SC, ucp_Zanabazar_Square },
{ 2554, PT_SC, ucp_Zanabazar_Square },
{ 2559, PT_SC, ucp_Inherited },
{ 2564, PT_PC, ucp_Zl },
{ 2567, PT_PC, ucp_Zp },
{ 2570, PT_PC, ucp_Zs },
{ 2573, PT_SCX, ucp_Common },
{ 2578, PT_SCX, ucp_Unknown }
{ 2573, PT_SC, ucp_Common },
{ 2578, PT_SC, ucp_Unknown }
};
const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);

View File

@ -134,7 +134,6 @@ while ((t = *data++) != XCL_END)
else /* XCL_PROP & XCL_NOTPROP */
{
const ucd_record *prop = GET_UCD(c);
int scriptx;
BOOL isprop = t == XCL_PROP;
BOOL ok;
@ -163,10 +162,8 @@ while ((t = *data++) != XCL_END)
break;
case PT_SCX:
scriptx = prop->scriptx;
ok = data[1] == prop->script || data[1] == (PCRE2_UCHAR)scriptx;
if (!ok && scriptx < 0)
ok = MAPBIT(PRIV(ucd_script_sets) - scriptx, data[1]);
ok = (data[1] == prop->script ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, data[1]) != 0);
if (ok == isprop) return !negated;
break;