Rework script extension handling (#64)

Co-authored-by: Zoltan Herczeg <hzmester@freemail.hu>
2021-12-29 10:35:22 +01:00 · 2021-12-29 10:35:22 +01:00 · afa4756d19
parent 7713f33e46
commit afa4756d19
13 changed files with 3745 additions and 3736 deletions
--- a/maint/GenerateCommon.py
+++ b/maint/GenerateCommon.py
@ -184,6 +184,46 @@ break_properties = [
  'Extended_Pictographic', '14'
  ]

+# ---------------------------------------------------------------------------
+#                      REORDERING SCRIPT NAMES
+# ---------------------------------------------------------------------------
+
+import re
+
+def reorder_scripts():
+  global script_names
+  global script_abbrevs
+
+  extended_script_abbrevs = set()
+  with open("Unicode.tables/ScriptExtensions.txt") as f:
+    names_re = re.compile(r'^[0-9A-F]{4,6}(?:\.\.[0-9A-F]{4,6})? +; ([A-Za-z_ ]+) #')
+
+    for line in f:
+      match_obj = names_re.match(line)
+
+      if match_obj == None:
+        continue
+
+      for name in match_obj.group(1).split(" "):
+        extended_script_abbrevs.add(name)
+
+  new_script_names = []
+  new_script_abbrevs = []
+
+  for idx, abbrev in enumerate(script_abbrevs):
+    if abbrev in extended_script_abbrevs:
+      new_script_names.append(script_names[idx])
+      new_script_abbrevs.append(abbrev)
+
+  for idx, abbrev in enumerate(script_abbrevs):
+    if abbrev not in extended_script_abbrevs:
+      new_script_names.append(script_names[idx])
+      new_script_abbrevs.append(abbrev)
+
+  script_names = new_script_names
+  script_abbrevs = new_script_abbrevs
+
+reorder_scripts()

 # ---------------------------------------------------------------------------
 #                         DERIVED LISTS
--- a/maint/GenerateUcd.py
+++ b/maint/GenerateUcd.py
@ -252,30 +252,15 @@ def get_other_case(chardata):
 # Parse a line of ScriptExtensions.txt

 def get_script_extension(chardata):
-  this_script_list = list(chardata[1].split(' '))
-  if len(this_script_list) == 1:
-    return script_abbrevs.index(this_script_list[0])
+  global last_script_extension

-  script_numbers = []
-  for d in this_script_list:
-    script_numbers.append(script_abbrevs.index(d))
-  script_numbers.append(0)
-  script_numbers_length = len(script_numbers)
+  offset = len(script_lists) * script_list_item_size
+  if last_script_extension == chardata[1]:
+    return offset - script_list_item_size

-  for i in range(1, len(script_lists) - script_numbers_length + 1):
-    for j in range(0, script_numbers_length):
-      found = True
-      if script_lists[i+j] != script_numbers[j]:
-        found = False
-        break
-    if found:
-      return -i
-
-  # Not found in existing lists
-
-  return_value = len(script_lists)
-  script_lists.extend(script_numbers)
-  return -return_value
+  last_script_extension = chardata[1]
+  script_lists.append(tuple(script_abbrevs.index(abbrev) for abbrev in last_script_extension.split(' ')))
+  return offset


 # Read a whole table in memory, setting/checking the Unicode version
@ -538,26 +523,10 @@ file.close()
 # multiple scripts. Initialize this list with a single entry, as the zeroth
 # element is never used.

-script_lists = [0]
-script_abbrevs_default = script_abbrevs.index('Zzzz')
-scriptx = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, script_abbrevs_default)
-
-# Scan all characters and set their default script extension to the main
-# script. We also have to adjust negative scriptx values, following a change in
-# the way these work. They are currently negated offsets into the script_lists
-# list, but have to be changed into indices in the new ucd_script_sets vector,
-# which has fixed-size entries. We can compute the new offset by counting the
-# zeros that precede the current offset.
-
-for i in range(0, MAX_UNICODE):
-  if scriptx[i] == script_abbrevs_default:
-    scriptx[i] = script[i]
-  elif scriptx[i] < 0:
-    count = 1
-    for j in range(-scriptx[i], 0, -1):
-      if script_lists[j] == 0:
-        count += 1
-    scriptx[i] = -count * (int(len(script_names)/32) + 1)
+script_lists = [[]]
+script_list_item_size = (script_names.index('Unknown') + 31) // 32
+last_script_extension = ""
+scriptx = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0)

 # With the addition of the Script Extensions field, we needed some padding to
 # get the Unicode records up to 12 bytes (multiple of 4). Originally this was a
@ -565,7 +534,7 @@ for i in range(0, MAX_UNICODE):
 # are now used for the bidi class, so zero will do.

 padding_dummy = [0] * MAX_UNICODE
-padding_dummy[0] = 0
+padding_dummy[0] = 256

 # This block of code was added by PH in September 2012. It scans the other_case
 # table to find sets of more than two characters that must all match each other
@ -806,24 +775,19 @@ f.write("""\
 const uint32_t PRIV(ucd_script_sets)[] = {
 """)

-bitword_count = len(script_names)/32 + 1
-bitwords = [0] * int(bitword_count)

 for d in script_lists:
-  if d == 0:
-    s = " "
-    f.write("  ")
-    for x in bitwords:
-      f.write("%s" % s)
-      s = ", "
-      f.write("0x%08xu" % x)
-    f.write(",\n")
-    bitwords = [0] * int(bitword_count)
+  bitwords = [0] * script_list_item_size

-  else:
-    x = int(d/32)
-    y = int(d%32)
-    bitwords[x] = bitwords[x] | (1 << y)
+  for idx in d:
+    bitwords[idx // 32] |= 1 << (idx % 31)
+
+  s = " "
+  for x in bitwords:
+    f.write("%s" % s)
+    s = ", "
+    f.write("0x%08xu" % x)
+  f.write(",\n")

 f.write("};\n\n")

--- a/maint/GenerateUcpHeader.py
+++ b/maint/GenerateUcpHeader.py
@ -64,8 +64,10 @@ for i in range(0, len(break_properties), 2):
    f.write("  ucp_gb%s,%s /* %s */\n" % (break_properties[i], sp, break_properties[i+1]))
 f.write("};\n\n")

-f.write("/* These are the script identifications, additions happen at the end. */\n\nenum {\n")
+f.write("/* These are the script identifications. */\n\nenum {\n  /* Scripts which has characters in other scripts. */\n")
 for i in script_names:
+    if i == "Unknown":
+      f.write("\n  /* Scripts which has no characters in other scripts. */\n")
    f.write("  ucp_%s,\n" % i)
 f.write("\n")

--- a/maint/GenerateUcpTables.py
+++ b/maint/GenerateUcpTables.py
@ -92,8 +92,12 @@ std_bidi_class_names = stdnames(bidi_class_names)
 # latter is used for the ucp_xx names. NOTE: for the script abbreviations, we
 # still use the full original names.

-utt_table  = list(zip(std_script_names, script_names, ['PT_SCX'] * len(script_names)))
-utt_table += list(zip(std_script_abbrevs, script_names, ['PT_SCX'] * len(script_abbrevs)))
+scx_end = script_names.index('Unknown')
+
+utt_table  = list(zip(std_script_names[0:scx_end], script_names[0:scx_end], ['PT_SCX'] * scx_end))
+utt_table += list(zip(std_script_names[scx_end:], script_names[scx_end:], ['PT_SC'] * (len(script_names) - scx_end)))
+utt_table += list(zip(std_script_abbrevs[0:scx_end], script_names[0:scx_end], ['PT_SCX'] * scx_end))
+utt_table += list(zip(std_script_abbrevs[scx_end:], script_names[scx_end:], ['PT_SC'] * (len(script_names) - scx_end)))

 # At lease one script abbreviation is the same as the full name of the script,
 # so we must remove duplicates. It doesn't matter if this operation changes the
--- a/src/pcre2_auto_possess.c
+++ b/src/pcre2_auto_possess.c
@ -200,7 +200,6 @@ check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
  BOOL negated)
 {
 BOOL ok;
-int scriptx;
 const uint32_t *p;
 const ucd_record *prop = GET_UCD(c);

@ -221,10 +220,8 @@ switch(ptype)
  return (pdata == prop->script) == negated;

  case PT_SCX:
-  scriptx = prop->scriptx;
-  ok = pdata == prop->script || pdata == (unsigned int)scriptx;
-  if (!ok && scriptx < 0)
-    ok = MAPBIT(PRIV(ucd_script_sets) - scriptx, pdata) != 0;
+  ok = (pdata == prop->script
+        || MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, pdata) != 0);
  return ok == negated;

  /* These are specials */
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@ -2206,13 +2206,23 @@ while (bot < top)
    {
    *pdataptr = PRIV(utt)[i].value;
    if (vptr == NULL || ptscript == PT_NOTSCRIPT)
-      *ptypeptr = PRIV(utt)[i].type;
-    else
      {
-      if (PRIV(utt)[i].type != PT_SCX) break;  /* Non-script found */
-      *ptypeptr = ptscript;
+      *ptypeptr = PRIV(utt)[i].type;
+      return TRUE;
      }
-    return TRUE;
+
+    switch (PRIV(utt)[i].type)
+      {
+      case PT_SC:
+      *ptypeptr = PT_SC;
+      return TRUE;
+
+      case PT_SCX:
+      *ptypeptr = ptscript;
+      return TRUE;
+      }
+
+    break;  /* Non-script found */
    }

  if (r > 0) bot = i + 1; else top = i;
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@ -1194,9 +1194,8 @@ for (;;)
          break;

          case PT_SCX:
-          OK = prop->script == code[2] || prop->scriptx == (int)code[2];
-          if (!OK && prop->scriptx < 0)
-            OK = MAPBIT(PRIV(ucd_script_sets) - prop->scriptx, code[2]) != 0;
+          OK = (prop->script == code[2] ||
+                MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[2]) != 0);
          break;

          /* These are specials for combination cases. */
@ -1466,9 +1465,8 @@ for (;;)
          break;

          case PT_SCX:
-          OK = prop->script == code[3] || prop->scriptx == (int)code[3];
-          if (!OK && prop->scriptx < 0)
-            OK = MAPBIT(PRIV(ucd_script_sets) - prop->scriptx, code[3]) != 0;
+          OK = (prop->script == code[3] ||
+                MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[3]) != 0);
          break;

          /* These are specials for combination cases. */
@ -1721,9 +1719,8 @@ for (;;)
          break;

          case PT_SCX:
-          OK = prop->script == code[3] || prop->scriptx == (int)code[3];
-          if (!OK && prop->scriptx < 0)
-            OK = MAPBIT(PRIV(ucd_script_sets) - prop->scriptx, code[3]) != 0;
+          OK = (prop->script == code[3] ||
+                MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[3]) != 0);
          break;

          /* These are specials for combination cases. */
@ -2001,11 +1998,9 @@ for (;;)
          break;

          case PT_SCX:
-          OK = prop->script == code[1 + IMM2_SIZE + 2] ||
-               prop->scriptx == (int)code[1 + IMM2_SIZE + 2];
-          if (!OK && prop->scriptx < 0)
-            OK = MAPBIT(PRIV(ucd_script_sets) - prop->scriptx,
-              code[1 + IMM2_SIZE + 2]) != 0;
+          OK = (prop->script == code[1 + IMM2_SIZE + 2] ||
+                MAPBIT(PRIV(ucd_script_sets) + prop->scriptx,
+                  code[1 + IMM2_SIZE + 2]) != 0);
          break;

          /* These are specials for combination cases. */
--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@ -1822,9 +1822,9 @@ typedef struct {
  uint8_t gbprop;     /* ucp_gbControl, etc. (grapheme break property) */
  uint8_t caseset;    /* offset to multichar other cases or zero */
  int32_t other_case; /* offset to other case, or zero if none */
-  int16_t scriptx;    /* script extension value */
+  uint8_t scriptx;    /* script extension value */
  uint8_t bidi;       /* bidi class and control flag */
-  uint8_t dummy;      /* spare - to round to multiple of 4 bytes */
+  uint16_t dummy;     /* spare - to round to multiple of 4 bytes */
 } ucd_record;

 /* UCD access macros */
@ -1849,8 +1849,8 @@ typedef struct {
 #define UCD_OTHERCASE(ch)   ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
 #define UCD_SCRIPTX(ch)     GET_UCD(ch)->scriptx

-/* The "scriptx" field, when negative, gives an offset into a vector of 32-bit
-words that form a bitmap representing a list of scripts. This macro tests for a
+/* The "scriptx" field gives an offset into a vector of 32-bit words that
+form a bitmap representing a list of scripts. This macro tests for a
 script in the map by number. */

 #define MAPBIT(map,script) ((map)[(script)/32]&(1u<<((script)%32)))
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@ -2454,11 +2454,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);

        case PT_SCX:
          {
-          int scriptx = prop->scriptx;
-          BOOL ok = Fecode[2] == prop->script ||
-                    Fecode[2] == (unsigned int)scriptx;
-          if (!ok && scriptx < 0)
-            ok = MAPBIT((PRIV(ucd_script_sets) - scriptx), Fecode[2]) != 0;
+          BOOL ok = (Fecode[2] == prop->script ||
+                     MAPBIT((PRIV(ucd_script_sets) + prop->scriptx), Fecode[2]) != 0);
          if (ok == notmatch) RRETURN(MATCH_NOMATCH);
          }
        break;
@ -2728,7 +2725,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
          for (i = 1; i <= Lmin; i++)
            {
            BOOL ok;
-            int scriptx;
            const ucd_record *prop;
            if (Feptr >= mb->end_subject)
              {
@ -2737,10 +2733,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
              }
            GETCHARINCTEST(fc, Feptr);
            prop = GET_UCD(fc);
-            scriptx = prop->scriptx;
-            ok = prop->script == Lpropvalue || scriptx == (int)Lpropvalue;
-            if (!ok && scriptx < 0)
-              ok = MAPBIT(PRIV(ucd_script_sets) - scriptx, Lpropvalue) != 0;
+            ok = (prop->script == Lpropvalue ||
+                  MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
            if (ok == notmatch)
              RRETURN(MATCH_NOMATCH);
            }
@ -3521,7 +3515,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
          for (;;)
            {
            BOOL ok;
-            int scriptx;
            const ucd_record *prop;
            RMATCH(Fecode, RM225);
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@ -3533,10 +3526,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
              }
            GETCHARINCTEST(fc, Feptr);
            prop = GET_UCD(fc);
-            scriptx = prop->scriptx;
-            ok = prop->script == Lpropvalue || scriptx == (int)Lpropvalue;
-            if (!ok && scriptx < 0)
-              ok = MAPBIT(PRIV(ucd_script_sets) - scriptx, Lpropvalue) != 0;
+            ok = (prop->script == Lpropvalue
+                  || MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
            if (ok == (Lctype == OP_NOTPROP))
              RRETURN(MATCH_NOMATCH);
            }
@ -4104,7 +4095,6 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
            {
            BOOL ok;
            const ucd_record *prop;
-            int scriptx;
            int len = 1;
            if (Feptr >= mb->end_subject)
              {
@ -4113,10 +4103,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
              }
            GETCHARLENTEST(fc, Feptr, len);
            prop = GET_UCD(fc);
-            scriptx = prop->scriptx;
-            ok = prop->script == Lpropvalue || scriptx == (int)Lpropvalue;
-            if (!ok && scriptx < 0)
-              ok = MAPBIT(PRIV(ucd_script_sets) - scriptx, Lpropvalue) != 0;
+            ok = (prop->script == Lpropvalue ||
+                  MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
            if (ok == notmatch) break;
            Feptr+= len;
            }
--- a/src/pcre2_ucd.c
+++ b/src/pcre2_ucd.c
--- a/src/pcre2_ucp.h
+++ b/src/pcre2_ucp.h
@ -149,57 +149,37 @@ enum {
  ucp_gbExtended_Pictographic, /* 14 */
 };

-/* These are the script identifications, additions happen at the end. */
+/* These are the script identifications. */

 enum {
-  ucp_Unknown,
+  /* Scripts which has characters in other scripts. */
  ucp_Arabic,
-  ucp_Armenian,
  ucp_Bengali,
  ucp_Bopomofo,
-  ucp_Braille,
  ucp_Buginese,
  ucp_Buhid,
-  ucp_Canadian_Aboriginal,
-  ucp_Cherokee,
-  ucp_Common,
  ucp_Coptic,
  ucp_Cypriot,
  ucp_Cyrillic,
-  ucp_Deseret,
  ucp_Devanagari,
-  ucp_Ethiopic,
  ucp_Georgian,
  ucp_Glagolitic,
-  ucp_Gothic,
  ucp_Greek,
  ucp_Gujarati,
  ucp_Gurmukhi,
  ucp_Han,
  ucp_Hangul,
  ucp_Hanunoo,
-  ucp_Hebrew,
  ucp_Hiragana,
-  ucp_Inherited,
  ucp_Kannada,
  ucp_Katakana,
-  ucp_Kharoshthi,
-  ucp_Khmer,
-  ucp_Lao,
  ucp_Latin,
  ucp_Limbu,
  ucp_Linear_B,
  ucp_Malayalam,
  ucp_Mongolian,
  ucp_Myanmar,
-  ucp_New_Tai_Lue,
-  ucp_Ogham,
-  ucp_Old_Italic,
-  ucp_Old_Persian,
  ucp_Oriya,
-  ucp_Osmanya,
-  ucp_Runic,
-  ucp_Shavian,
  ucp_Sinhala,
  ucp_Syloti_Nagri,
  ucp_Syriac,
@ -209,19 +189,70 @@ enum {
  ucp_Tamil,
  ucp_Telugu,
  ucp_Thaana,
+  ucp_Yi,
+  ucp_Nko,
+  ucp_Phags_Pa,
+  ucp_Kayah_Li,
+  ucp_Javanese,
+  ucp_Kaithi,
+  ucp_Mandaic,
+  ucp_Chakma,
+  ucp_Sharada,
+  ucp_Takri,
+  ucp_Duployan,
+  ucp_Grantha,
+  ucp_Khojki,
+  ucp_Khudawadi,
+  ucp_Linear_A,
+  ucp_Mahajani,
+  ucp_Manichaean,
+  ucp_Modi,
+  ucp_Old_Permic,
+  ucp_Psalter_Pahlavi,
+  ucp_Tirhuta,
+  ucp_Multani,
+  ucp_Adlam,
+  ucp_Masaram_Gondi,
+  ucp_Dogra,
+  ucp_Gunjala_Gondi,
+  ucp_Hanifi_Rohingya,
+  ucp_Sogdian,
+  ucp_Nandinagari,
+  ucp_Yezidi,
+  ucp_Cypro_Minoan,
+  ucp_Old_Uyghur,
+
+  /* Scripts which has no characters in other scripts. */
+  ucp_Unknown,
+  ucp_Armenian,
+  ucp_Braille,
+  ucp_Canadian_Aboriginal,
+  ucp_Cherokee,
+  ucp_Common,
+  ucp_Deseret,
+  ucp_Ethiopic,
+  ucp_Gothic,
+  ucp_Hebrew,
+  ucp_Inherited,
+  ucp_Kharoshthi,
+  ucp_Khmer,
+  ucp_Lao,
+  ucp_New_Tai_Lue,
+  ucp_Ogham,
+  ucp_Old_Italic,
+  ucp_Old_Persian,
+  ucp_Osmanya,
+  ucp_Runic,
+  ucp_Shavian,
  ucp_Thai,
  ucp_Tibetan,
  ucp_Tifinagh,
  ucp_Ugaritic,
-  ucp_Yi,
  ucp_Balinese,
  ucp_Cuneiform,
-  ucp_Nko,
-  ucp_Phags_Pa,
  ucp_Phoenician,
  ucp_Carian,
  ucp_Cham,
-  ucp_Kayah_Li,
  ucp_Lepcha,
  ucp_Lycian,
  ucp_Lydian,
@ -236,8 +267,6 @@ enum {
  ucp_Imperial_Aramaic,
  ucp_Inscriptional_Pahlavi,
  ucp_Inscriptional_Parthian,
-  ucp_Javanese,
-  ucp_Kaithi,
  ucp_Lisu,
  ucp_Meetei_Mayek,
  ucp_Old_South_Arabian,
@ -247,70 +276,44 @@ enum {
  ucp_Tai_Viet,
  ucp_Batak,
  ucp_Brahmi,
-  ucp_Mandaic,
-  ucp_Chakma,
  ucp_Meroitic_Cursive,
  ucp_Meroitic_Hieroglyphs,
  ucp_Miao,
-  ucp_Sharada,
  ucp_Sora_Sompeng,
-  ucp_Takri,
  ucp_Bassa_Vah,
  ucp_Caucasian_Albanian,
-  ucp_Duployan,
  ucp_Elbasan,
-  ucp_Grantha,
-  ucp_Khojki,
-  ucp_Khudawadi,
-  ucp_Linear_A,
-  ucp_Mahajani,
-  ucp_Manichaean,
  ucp_Mende_Kikakui,
-  ucp_Modi,
  ucp_Mro,
  ucp_Nabataean,
  ucp_Old_North_Arabian,
-  ucp_Old_Permic,
  ucp_Pahawh_Hmong,
  ucp_Palmyrene,
-  ucp_Psalter_Pahlavi,
  ucp_Pau_Cin_Hau,
  ucp_Siddham,
-  ucp_Tirhuta,
  ucp_Warang_Citi,
  ucp_Ahom,
  ucp_Anatolian_Hieroglyphs,
  ucp_Hatran,
-  ucp_Multani,
  ucp_Old_Hungarian,
  ucp_SignWriting,
-  ucp_Adlam,
  ucp_Bhaiksuki,
  ucp_Marchen,
  ucp_Newa,
  ucp_Osage,
  ucp_Tangut,
-  ucp_Masaram_Gondi,
  ucp_Nushu,
  ucp_Soyombo,
  ucp_Zanabazar_Square,
-  ucp_Dogra,
-  ucp_Gunjala_Gondi,
-  ucp_Hanifi_Rohingya,
  ucp_Makasar,
  ucp_Medefaidrin,
  ucp_Old_Sogdian,
-  ucp_Sogdian,
  ucp_Elymaic,
-  ucp_Nandinagari,
  ucp_Nyiakeng_Puachue_Hmong,
  ucp_Wancho,
  ucp_Chorasmian,
  ucp_Dives_Akuru,
  ucp_Khitan_Small_Script,
-  ucp_Yezidi,
-  ucp_Cypro_Minoan,
-  ucp_Old_Uyghur,
  ucp_Tangsa,
  ucp_Toto,
  ucp_Vithkuqi,
--- a/src/pcre2_ucptables.c
+++ b/src/pcre2_ucptables.c
@ -833,29 +833,29 @@ const char PRIV(utt_names)[] =
 const ucp_type_table PRIV(utt)[] = {
  {   0, PT_SCX, ucp_Adlam },
  {   6, PT_SCX, ucp_Adlam },
-  {  11, PT_SCX, ucp_Caucasian_Albanian },
-  {  16, PT_SCX, ucp_Ahom },
-  {  21, PT_SCX, ucp_Anatolian_Hieroglyphs },
+  {  11, PT_SC, ucp_Caucasian_Albanian },
+  {  16, PT_SC, ucp_Ahom },
+  {  21, PT_SC, ucp_Anatolian_Hieroglyphs },
  {  42, PT_ANY, 0 },
  {  46, PT_SCX, ucp_Arabic },
  {  51, PT_SCX, ucp_Arabic },
-  {  58, PT_SCX, ucp_Armenian },
-  {  67, PT_SCX, ucp_Imperial_Aramaic },
-  {  72, PT_SCX, ucp_Armenian },
-  {  77, PT_SCX, ucp_Avestan },
-  {  85, PT_SCX, ucp_Avestan },
-  {  90, PT_SCX, ucp_Balinese },
-  {  95, PT_SCX, ucp_Balinese },
-  { 104, PT_SCX, ucp_Bamum },
-  { 109, PT_SCX, ucp_Bamum },
-  { 115, PT_SCX, ucp_Bassa_Vah },
-  { 120, PT_SCX, ucp_Bassa_Vah },
-  { 129, PT_SCX, ucp_Batak },
-  { 135, PT_SCX, ucp_Batak },
+  {  58, PT_SC, ucp_Armenian },
+  {  67, PT_SC, ucp_Imperial_Aramaic },
+  {  72, PT_SC, ucp_Armenian },
+  {  77, PT_SC, ucp_Avestan },
+  {  85, PT_SC, ucp_Avestan },
+  {  90, PT_SC, ucp_Balinese },
+  {  95, PT_SC, ucp_Balinese },
+  { 104, PT_SC, ucp_Bamum },
+  { 109, PT_SC, ucp_Bamum },
+  { 115, PT_SC, ucp_Bassa_Vah },
+  { 120, PT_SC, ucp_Bassa_Vah },
+  { 129, PT_SC, ucp_Batak },
+  { 135, PT_SC, ucp_Batak },
  { 140, PT_SCX, ucp_Bengali },
  { 145, PT_SCX, ucp_Bengali },
-  { 153, PT_SCX, ucp_Bhaiksuki },
-  { 163, PT_SCX, ucp_Bhaiksuki },
+  { 153, PT_SC, ucp_Bhaiksuki },
+  { 163, PT_SC, ucp_Bhaiksuki },
  { 168, PT_BIDICL, ucp_bidiAL },
  { 175, PT_BIDICL, ucp_bidiAN },
  { 182, PT_BIDICL, ucp_bidiB },
@ -883,68 +883,68 @@ const ucp_type_table PRIV(utt)[] = {
  { 346, PT_BIDICL, ucp_bidiWS },
  { 353, PT_SCX, ucp_Bopomofo },
  { 358, PT_SCX, ucp_Bopomofo },
-  { 367, PT_SCX, ucp_Brahmi },
-  { 372, PT_SCX, ucp_Brahmi },
-  { 379, PT_SCX, ucp_Braille },
-  { 384, PT_SCX, ucp_Braille },
+  { 367, PT_SC, ucp_Brahmi },
+  { 372, PT_SC, ucp_Brahmi },
+  { 379, PT_SC, ucp_Braille },
+  { 384, PT_SC, ucp_Braille },
  { 392, PT_SCX, ucp_Buginese },
  { 397, PT_SCX, ucp_Buginese },
  { 406, PT_SCX, ucp_Buhid },
  { 411, PT_SCX, ucp_Buhid },
  { 417, PT_GC, ucp_C },
  { 419, PT_SCX, ucp_Chakma },
-  { 424, PT_SCX, ucp_Canadian_Aboriginal },
-  { 443, PT_SCX, ucp_Canadian_Aboriginal },
-  { 448, PT_SCX, ucp_Carian },
-  { 453, PT_SCX, ucp_Carian },
-  { 460, PT_SCX, ucp_Caucasian_Albanian },
+  { 424, PT_SC, ucp_Canadian_Aboriginal },
+  { 443, PT_SC, ucp_Canadian_Aboriginal },
+  { 448, PT_SC, ucp_Carian },
+  { 453, PT_SC, ucp_Carian },
+  { 460, PT_SC, ucp_Caucasian_Albanian },
  { 478, PT_PC, ucp_Cc },
  { 481, PT_PC, ucp_Cf },
  { 484, PT_SCX, ucp_Chakma },
-  { 491, PT_SCX, ucp_Cham },
-  { 496, PT_SCX, ucp_Cherokee },
-  { 501, PT_SCX, ucp_Cherokee },
-  { 510, PT_SCX, ucp_Chorasmian },
-  { 521, PT_SCX, ucp_Chorasmian },
+  { 491, PT_SC, ucp_Cham },
+  { 496, PT_SC, ucp_Cherokee },
+  { 501, PT_SC, ucp_Cherokee },
+  { 510, PT_SC, ucp_Chorasmian },
+  { 521, PT_SC, ucp_Chorasmian },
  { 526, PT_PC, ucp_Cn },
  { 529, PT_PC, ucp_Co },
-  { 532, PT_SCX, ucp_Common },
+  { 532, PT_SC, ucp_Common },
  { 539, PT_SCX, ucp_Coptic },
  { 544, PT_SCX, ucp_Coptic },
  { 551, PT_SCX, ucp_Cypro_Minoan },
  { 556, PT_SCX, ucp_Cypriot },
  { 561, PT_PC, ucp_Cs },
-  { 564, PT_SCX, ucp_Cuneiform },
+  { 564, PT_SC, ucp_Cuneiform },
  { 574, PT_SCX, ucp_Cypriot },
  { 582, PT_SCX, ucp_Cypro_Minoan },
  { 594, PT_SCX, ucp_Cyrillic },
  { 603, PT_SCX, ucp_Cyrillic },
-  { 608, PT_SCX, ucp_Deseret },
+  { 608, PT_SC, ucp_Deseret },
  { 616, PT_SCX, ucp_Devanagari },
  { 621, PT_SCX, ucp_Devanagari },
-  { 632, PT_SCX, ucp_Dives_Akuru },
-  { 637, PT_SCX, ucp_Dives_Akuru },
+  { 632, PT_SC, ucp_Dives_Akuru },
+  { 637, PT_SC, ucp_Dives_Akuru },
  { 648, PT_SCX, ucp_Dogra },
  { 653, PT_SCX, ucp_Dogra },
-  { 659, PT_SCX, ucp_Deseret },
+  { 659, PT_SC, ucp_Deseret },
  { 664, PT_SCX, ucp_Duployan },
  { 669, PT_SCX, ucp_Duployan },
-  { 678, PT_SCX, ucp_Egyptian_Hieroglyphs },
-  { 683, PT_SCX, ucp_Egyptian_Hieroglyphs },
-  { 703, PT_SCX, ucp_Elbasan },
-  { 708, PT_SCX, ucp_Elbasan },
-  { 716, PT_SCX, ucp_Elymaic },
-  { 721, PT_SCX, ucp_Elymaic },
-  { 729, PT_SCX, ucp_Ethiopic },
-  { 734, PT_SCX, ucp_Ethiopic },
+  { 678, PT_SC, ucp_Egyptian_Hieroglyphs },
+  { 683, PT_SC, ucp_Egyptian_Hieroglyphs },
+  { 703, PT_SC, ucp_Elbasan },
+  { 708, PT_SC, ucp_Elbasan },
+  { 716, PT_SC, ucp_Elymaic },
+  { 721, PT_SC, ucp_Elymaic },
+  { 729, PT_SC, ucp_Ethiopic },
+  { 734, PT_SC, ucp_Ethiopic },
  { 743, PT_SCX, ucp_Georgian },
  { 748, PT_SCX, ucp_Georgian },
  { 757, PT_SCX, ucp_Glagolitic },
  { 762, PT_SCX, ucp_Glagolitic },
  { 773, PT_SCX, ucp_Gunjala_Gondi },
  { 778, PT_SCX, ucp_Masaram_Gondi },
-  { 783, PT_SCX, ucp_Gothic },
-  { 788, PT_SCX, ucp_Gothic },
+  { 783, PT_SC, ucp_Gothic },
+  { 788, PT_SC, ucp_Gothic },
  { 795, PT_SCX, ucp_Grantha },
  { 800, PT_SCX, ucp_Grantha },
  { 808, PT_SCX, ucp_Greek },
@ -961,21 +961,21 @@ const ucp_type_table PRIV(utt)[] = {
  { 881, PT_SCX, ucp_Hanifi_Rohingya },
  { 896, PT_SCX, ucp_Hanunoo },
  { 901, PT_SCX, ucp_Hanunoo },
-  { 909, PT_SCX, ucp_Hatran },
-  { 914, PT_SCX, ucp_Hatran },
-  { 921, PT_SCX, ucp_Hebrew },
-  { 926, PT_SCX, ucp_Hebrew },
+  { 909, PT_SC, ucp_Hatran },
+  { 914, PT_SC, ucp_Hatran },
+  { 921, PT_SC, ucp_Hebrew },
+  { 926, PT_SC, ucp_Hebrew },
  { 933, PT_SCX, ucp_Hiragana },
  { 938, PT_SCX, ucp_Hiragana },
-  { 947, PT_SCX, ucp_Anatolian_Hieroglyphs },
-  { 952, PT_SCX, ucp_Pahawh_Hmong },
-  { 957, PT_SCX, ucp_Nyiakeng_Puachue_Hmong },
-  { 962, PT_SCX, ucp_Old_Hungarian },
-  { 967, PT_SCX, ucp_Imperial_Aramaic },
-  { 983, PT_SCX, ucp_Inherited },
-  { 993, PT_SCX, ucp_Inscriptional_Pahlavi },
-  { 1014, PT_SCX, ucp_Inscriptional_Parthian },
-  { 1036, PT_SCX, ucp_Old_Italic },
+  { 947, PT_SC, ucp_Anatolian_Hieroglyphs },
+  { 952, PT_SC, ucp_Pahawh_Hmong },
+  { 957, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
+  { 962, PT_SC, ucp_Old_Hungarian },
+  { 967, PT_SC, ucp_Imperial_Aramaic },
+  { 983, PT_SC, ucp_Inherited },
+  { 993, PT_SC, ucp_Inscriptional_Pahlavi },
+  { 1014, PT_SC, ucp_Inscriptional_Parthian },
+  { 1036, PT_SC, ucp_Old_Italic },
  { 1041, PT_SCX, ucp_Javanese },
  { 1046, PT_SCX, ucp_Javanese },
  { 1055, PT_SCX, ucp_Kaithi },
@ -984,123 +984,123 @@ const ucp_type_table PRIV(utt)[] = {
  { 1072, PT_SCX, ucp_Kannada },
  { 1080, PT_SCX, ucp_Katakana },
  { 1089, PT_SCX, ucp_Kayah_Li },
-  { 1097, PT_SCX, ucp_Kharoshthi },
-  { 1102, PT_SCX, ucp_Kharoshthi },
-  { 1113, PT_SCX, ucp_Khitan_Small_Script },
-  { 1131, PT_SCX, ucp_Khmer },
-  { 1137, PT_SCX, ucp_Khmer },
+  { 1097, PT_SC, ucp_Kharoshthi },
+  { 1102, PT_SC, ucp_Kharoshthi },
+  { 1113, PT_SC, ucp_Khitan_Small_Script },
+  { 1131, PT_SC, ucp_Khmer },
+  { 1137, PT_SC, ucp_Khmer },
  { 1142, PT_SCX, ucp_Khojki },
  { 1147, PT_SCX, ucp_Khojki },
  { 1154, PT_SCX, ucp_Khudawadi },
-  { 1164, PT_SCX, ucp_Khitan_Small_Script },
+  { 1164, PT_SC, ucp_Khitan_Small_Script },
  { 1169, PT_SCX, ucp_Kannada },
  { 1174, PT_SCX, ucp_Kaithi },
  { 1179, PT_GC, ucp_L },
  { 1181, PT_LAMP, 0 },
-  { 1184, PT_SCX, ucp_Tai_Tham },
-  { 1189, PT_SCX, ucp_Lao },
-  { 1193, PT_SCX, ucp_Lao },
+  { 1184, PT_SC, ucp_Tai_Tham },
+  { 1189, PT_SC, ucp_Lao },
+  { 1193, PT_SC, ucp_Lao },
  { 1198, PT_SCX, ucp_Latin },
  { 1204, PT_SCX, ucp_Latin },
  { 1209, PT_LAMP, 0 },
-  { 1212, PT_SCX, ucp_Lepcha },
-  { 1217, PT_SCX, ucp_Lepcha },
+  { 1212, PT_SC, ucp_Lepcha },
+  { 1217, PT_SC, ucp_Lepcha },
  { 1224, PT_SCX, ucp_Limbu },
  { 1229, PT_SCX, ucp_Limbu },
  { 1235, PT_SCX, ucp_Linear_A },
  { 1240, PT_SCX, ucp_Linear_B },
  { 1245, PT_SCX, ucp_Linear_A },
  { 1253, PT_SCX, ucp_Linear_B },
-  { 1261, PT_SCX, ucp_Lisu },
+  { 1261, PT_SC, ucp_Lisu },
  { 1266, PT_PC, ucp_Ll },
  { 1269, PT_PC, ucp_Lm },
  { 1272, PT_PC, ucp_Lo },
  { 1275, PT_PC, ucp_Lt },
  { 1278, PT_PC, ucp_Lu },
-  { 1281, PT_SCX, ucp_Lycian },
-  { 1286, PT_SCX, ucp_Lycian },
-  { 1293, PT_SCX, ucp_Lydian },
-  { 1298, PT_SCX, ucp_Lydian },
+  { 1281, PT_SC, ucp_Lycian },
+  { 1286, PT_SC, ucp_Lycian },
+  { 1293, PT_SC, ucp_Lydian },
+  { 1298, PT_SC, ucp_Lydian },
  { 1305, PT_GC, ucp_M },
  { 1307, PT_SCX, ucp_Mahajani },
  { 1316, PT_SCX, ucp_Mahajani },
-  { 1321, PT_SCX, ucp_Makasar },
-  { 1326, PT_SCX, ucp_Makasar },
+  { 1321, PT_SC, ucp_Makasar },
+  { 1326, PT_SC, ucp_Makasar },
  { 1334, PT_SCX, ucp_Malayalam },
  { 1344, PT_SCX, ucp_Mandaic },
  { 1349, PT_SCX, ucp_Mandaic },
  { 1357, PT_SCX, ucp_Manichaean },
  { 1362, PT_SCX, ucp_Manichaean },
-  { 1373, PT_SCX, ucp_Marchen },
-  { 1378, PT_SCX, ucp_Marchen },
+  { 1373, PT_SC, ucp_Marchen },
+  { 1378, PT_SC, ucp_Marchen },
  { 1386, PT_SCX, ucp_Masaram_Gondi },
  { 1399, PT_PC, ucp_Mc },
  { 1402, PT_PC, ucp_Me },
-  { 1405, PT_SCX, ucp_Medefaidrin },
-  { 1417, PT_SCX, ucp_Medefaidrin },
-  { 1422, PT_SCX, ucp_Meetei_Mayek },
-  { 1434, PT_SCX, ucp_Mende_Kikakui },
-  { 1439, PT_SCX, ucp_Mende_Kikakui },
-  { 1452, PT_SCX, ucp_Meroitic_Cursive },
-  { 1457, PT_SCX, ucp_Meroitic_Hieroglyphs },
-  { 1462, PT_SCX, ucp_Meroitic_Cursive },
-  { 1478, PT_SCX, ucp_Meroitic_Hieroglyphs },
-  { 1498, PT_SCX, ucp_Miao },
+  { 1405, PT_SC, ucp_Medefaidrin },
+  { 1417, PT_SC, ucp_Medefaidrin },
+  { 1422, PT_SC, ucp_Meetei_Mayek },
+  { 1434, PT_SC, ucp_Mende_Kikakui },
+  { 1439, PT_SC, ucp_Mende_Kikakui },
+  { 1452, PT_SC, ucp_Meroitic_Cursive },
+  { 1457, PT_SC, ucp_Meroitic_Hieroglyphs },
+  { 1462, PT_SC, ucp_Meroitic_Cursive },
+  { 1478, PT_SC, ucp_Meroitic_Hieroglyphs },
+  { 1498, PT_SC, ucp_Miao },
  { 1503, PT_SCX, ucp_Malayalam },
  { 1508, PT_PC, ucp_Mn },
  { 1511, PT_SCX, ucp_Modi },
  { 1516, PT_SCX, ucp_Mongolian },
  { 1521, PT_SCX, ucp_Mongolian },
-  { 1531, PT_SCX, ucp_Mro },
-  { 1535, PT_SCX, ucp_Mro },
-  { 1540, PT_SCX, ucp_Meetei_Mayek },
+  { 1531, PT_SC, ucp_Mro },
+  { 1535, PT_SC, ucp_Mro },
+  { 1540, PT_SC, ucp_Meetei_Mayek },
  { 1545, PT_SCX, ucp_Multani },
  { 1550, PT_SCX, ucp_Multani },
  { 1558, PT_SCX, ucp_Myanmar },
  { 1566, PT_SCX, ucp_Myanmar },
  { 1571, PT_GC, ucp_N },
-  { 1573, PT_SCX, ucp_Nabataean },
+  { 1573, PT_SC, ucp_Nabataean },
  { 1583, PT_SCX, ucp_Nandinagari },
  { 1588, PT_SCX, ucp_Nandinagari },
-  { 1600, PT_SCX, ucp_Old_North_Arabian },
-  { 1605, PT_SCX, ucp_Nabataean },
+  { 1600, PT_SC, ucp_Old_North_Arabian },
+  { 1605, PT_SC, ucp_Nabataean },
  { 1610, PT_PC, ucp_Nd },
-  { 1613, PT_SCX, ucp_Newa },
-  { 1618, PT_SCX, ucp_New_Tai_Lue },
+  { 1613, PT_SC, ucp_Newa },
+  { 1618, PT_SC, ucp_New_Tai_Lue },
  { 1628, PT_SCX, ucp_Nko },
  { 1632, PT_SCX, ucp_Nko },
  { 1637, PT_PC, ucp_Nl },
  { 1640, PT_PC, ucp_No },
-  { 1643, PT_SCX, ucp_Nushu },
-  { 1648, PT_SCX, ucp_Nushu },
-  { 1654, PT_SCX, ucp_Nyiakeng_Puachue_Hmong },
-  { 1675, PT_SCX, ucp_Ogham },
-  { 1680, PT_SCX, ucp_Ogham },
-  { 1686, PT_SCX, ucp_Ol_Chiki },
-  { 1694, PT_SCX, ucp_Ol_Chiki },
-  { 1699, PT_SCX, ucp_Old_Hungarian },
-  { 1712, PT_SCX, ucp_Old_Italic },
-  { 1722, PT_SCX, ucp_Old_North_Arabian },
+  { 1643, PT_SC, ucp_Nushu },
+  { 1648, PT_SC, ucp_Nushu },
+  { 1654, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
+  { 1675, PT_SC, ucp_Ogham },
+  { 1680, PT_SC, ucp_Ogham },
+  { 1686, PT_SC, ucp_Ol_Chiki },
+  { 1694, PT_SC, ucp_Ol_Chiki },
+  { 1699, PT_SC, ucp_Old_Hungarian },
+  { 1712, PT_SC, ucp_Old_Italic },
+  { 1722, PT_SC, ucp_Old_North_Arabian },
  { 1738, PT_SCX, ucp_Old_Permic },
-  { 1748, PT_SCX, ucp_Old_Persian },
-  { 1759, PT_SCX, ucp_Old_Sogdian },
-  { 1770, PT_SCX, ucp_Old_South_Arabian },
-  { 1786, PT_SCX, ucp_Old_Turkic },
+  { 1748, PT_SC, ucp_Old_Persian },
+  { 1759, PT_SC, ucp_Old_Sogdian },
+  { 1770, PT_SC, ucp_Old_South_Arabian },
+  { 1786, PT_SC, ucp_Old_Turkic },
  { 1796, PT_SCX, ucp_Old_Uyghur },
  { 1806, PT_SCX, ucp_Oriya },
-  { 1812, PT_SCX, ucp_Old_Turkic },
+  { 1812, PT_SC, ucp_Old_Turkic },
  { 1817, PT_SCX, ucp_Oriya },
-  { 1822, PT_SCX, ucp_Osage },
-  { 1828, PT_SCX, ucp_Osage },
-  { 1833, PT_SCX, ucp_Osmanya },
-  { 1838, PT_SCX, ucp_Osmanya },
+  { 1822, PT_SC, ucp_Osage },
+  { 1828, PT_SC, ucp_Osage },
+  { 1833, PT_SC, ucp_Osmanya },
+  { 1838, PT_SC, ucp_Osmanya },
  { 1846, PT_SCX, ucp_Old_Uyghur },
  { 1851, PT_GC, ucp_P },
-  { 1853, PT_SCX, ucp_Pahawh_Hmong },
-  { 1865, PT_SCX, ucp_Palmyrene },
-  { 1870, PT_SCX, ucp_Palmyrene },
-  { 1880, PT_SCX, ucp_Pau_Cin_Hau },
-  { 1885, PT_SCX, ucp_Pau_Cin_Hau },
+  { 1853, PT_SC, ucp_Pahawh_Hmong },
+  { 1865, PT_SC, ucp_Palmyrene },
+  { 1870, PT_SC, ucp_Palmyrene },
+  { 1880, PT_SC, ucp_Pau_Cin_Hau },
+  { 1885, PT_SC, ucp_Pau_Cin_Hau },
  { 1895, PT_PC, ucp_Pc },
  { 1898, PT_PC, ucp_Pd },
  { 1901, PT_PC, ucp_Pe },
@ -1108,36 +1108,36 @@ const ucp_type_table PRIV(utt)[] = {
  { 1909, PT_PC, ucp_Pf },
  { 1912, PT_SCX, ucp_Phags_Pa },
  { 1917, PT_SCX, ucp_Phags_Pa },
-  { 1925, PT_SCX, ucp_Inscriptional_Pahlavi },
+  { 1925, PT_SC, ucp_Inscriptional_Pahlavi },
  { 1930, PT_SCX, ucp_Psalter_Pahlavi },
-  { 1935, PT_SCX, ucp_Phoenician },
-  { 1940, PT_SCX, ucp_Phoenician },
+  { 1935, PT_SC, ucp_Phoenician },
+  { 1940, PT_SC, ucp_Phoenician },
  { 1951, PT_PC, ucp_Pi },
-  { 1954, PT_SCX, ucp_Miao },
+  { 1954, PT_SC, ucp_Miao },
  { 1959, PT_PC, ucp_Po },
-  { 1962, PT_SCX, ucp_Inscriptional_Parthian },
+  { 1962, PT_SC, ucp_Inscriptional_Parthian },
  { 1967, PT_PC, ucp_Ps },
  { 1970, PT_SCX, ucp_Psalter_Pahlavi },
-  { 1985, PT_SCX, ucp_Rejang },
-  { 1992, PT_SCX, ucp_Rejang },
+  { 1985, PT_SC, ucp_Rejang },
+  { 1992, PT_SC, ucp_Rejang },
  { 1997, PT_SCX, ucp_Hanifi_Rohingya },
-  { 2002, PT_SCX, ucp_Runic },
-  { 2008, PT_SCX, ucp_Runic },
+  { 2002, PT_SC, ucp_Runic },
+  { 2008, PT_SC, ucp_Runic },
  { 2013, PT_GC, ucp_S },
-  { 2015, PT_SCX, ucp_Samaritan },
-  { 2025, PT_SCX, ucp_Samaritan },
-  { 2030, PT_SCX, ucp_Old_South_Arabian },
-  { 2035, PT_SCX, ucp_Saurashtra },
-  { 2040, PT_SCX, ucp_Saurashtra },
+  { 2015, PT_SC, ucp_Samaritan },
+  { 2025, PT_SC, ucp_Samaritan },
+  { 2030, PT_SC, ucp_Old_South_Arabian },
+  { 2035, PT_SC, ucp_Saurashtra },
+  { 2040, PT_SC, ucp_Saurashtra },
  { 2051, PT_PC, ucp_Sc },
-  { 2054, PT_SCX, ucp_SignWriting },
+  { 2054, PT_SC, ucp_SignWriting },
  { 2059, PT_SCX, ucp_Sharada },
-  { 2067, PT_SCX, ucp_Shavian },
-  { 2075, PT_SCX, ucp_Shavian },
+  { 2067, PT_SC, ucp_Shavian },
+  { 2075, PT_SC, ucp_Shavian },
  { 2080, PT_SCX, ucp_Sharada },
-  { 2085, PT_SCX, ucp_Siddham },
-  { 2090, PT_SCX, ucp_Siddham },
-  { 2098, PT_SCX, ucp_SignWriting },
+  { 2085, PT_SC, ucp_Siddham },
+  { 2090, PT_SC, ucp_Siddham },
+  { 2098, PT_SC, ucp_SignWriting },
  { 2110, PT_SCX, ucp_Khudawadi },
  { 2115, PT_SCX, ucp_Sinhala },
  { 2120, PT_SCX, ucp_Sinhala },
@ -1146,13 +1146,13 @@ const ucp_type_table PRIV(utt)[] = {
  { 2134, PT_PC, ucp_So },
  { 2137, PT_SCX, ucp_Sogdian },
  { 2142, PT_SCX, ucp_Sogdian },
-  { 2150, PT_SCX, ucp_Old_Sogdian },
-  { 2155, PT_SCX, ucp_Sora_Sompeng },
-  { 2160, PT_SCX, ucp_Sora_Sompeng },
-  { 2172, PT_SCX, ucp_Soyombo },
-  { 2177, PT_SCX, ucp_Soyombo },
-  { 2185, PT_SCX, ucp_Sundanese },
-  { 2190, PT_SCX, ucp_Sundanese },
+  { 2150, PT_SC, ucp_Old_Sogdian },
+  { 2155, PT_SC, ucp_Sora_Sompeng },
+  { 2160, PT_SC, ucp_Sora_Sompeng },
+  { 2172, PT_SC, ucp_Soyombo },
+  { 2177, PT_SC, ucp_Soyombo },
+  { 2185, PT_SC, ucp_Sundanese },
+  { 2190, PT_SC, ucp_Sundanese },
  { 2200, PT_SCX, ucp_Syloti_Nagri },
  { 2205, PT_SCX, ucp_Syloti_Nagri },
  { 2217, PT_SCX, ucp_Syriac },
@ -1161,48 +1161,48 @@ const ucp_type_table PRIV(utt)[] = {
  { 2237, PT_SCX, ucp_Tagbanwa },
  { 2242, PT_SCX, ucp_Tagbanwa },
  { 2251, PT_SCX, ucp_Tai_Le },
-  { 2257, PT_SCX, ucp_Tai_Tham },
-  { 2265, PT_SCX, ucp_Tai_Viet },
+  { 2257, PT_SC, ucp_Tai_Tham },
+  { 2265, PT_SC, ucp_Tai_Viet },
  { 2273, PT_SCX, ucp_Takri },
  { 2278, PT_SCX, ucp_Takri },
  { 2284, PT_SCX, ucp_Tai_Le },
-  { 2289, PT_SCX, ucp_New_Tai_Lue },
+  { 2289, PT_SC, ucp_New_Tai_Lue },
  { 2294, PT_SCX, ucp_Tamil },
  { 2300, PT_SCX, ucp_Tamil },
-  { 2305, PT_SCX, ucp_Tangut },
-  { 2310, PT_SCX, ucp_Tangsa },
-  { 2317, PT_SCX, ucp_Tangut },
-  { 2324, PT_SCX, ucp_Tai_Viet },
+  { 2305, PT_SC, ucp_Tangut },
+  { 2310, PT_SC, ucp_Tangsa },
+  { 2317, PT_SC, ucp_Tangut },
+  { 2324, PT_SC, ucp_Tai_Viet },
  { 2329, PT_SCX, ucp_Telugu },
  { 2334, PT_SCX, ucp_Telugu },
-  { 2341, PT_SCX, ucp_Tifinagh },
+  { 2341, PT_SC, ucp_Tifinagh },
  { 2346, PT_SCX, ucp_Tagalog },
  { 2351, PT_SCX, ucp_Thaana },
  { 2356, PT_SCX, ucp_Thaana },
-  { 2363, PT_SCX, ucp_Thai },
-  { 2368, PT_SCX, ucp_Tibetan },
-  { 2376, PT_SCX, ucp_Tibetan },
-  { 2381, PT_SCX, ucp_Tifinagh },
+  { 2363, PT_SC, ucp_Thai },
+  { 2368, PT_SC, ucp_Tibetan },
+  { 2376, PT_SC, ucp_Tibetan },
+  { 2381, PT_SC, ucp_Tifinagh },
  { 2390, PT_SCX, ucp_Tirhuta },
  { 2395, PT_SCX, ucp_Tirhuta },
-  { 2403, PT_SCX, ucp_Tangsa },
-  { 2408, PT_SCX, ucp_Toto },
-  { 2413, PT_SCX, ucp_Ugaritic },
-  { 2418, PT_SCX, ucp_Ugaritic },
-  { 2427, PT_SCX, ucp_Unknown },
-  { 2435, PT_SCX, ucp_Vai },
-  { 2439, PT_SCX, ucp_Vai },
-  { 2444, PT_SCX, ucp_Vithkuqi },
-  { 2449, PT_SCX, ucp_Vithkuqi },
-  { 2458, PT_SCX, ucp_Wancho },
-  { 2465, PT_SCX, ucp_Warang_Citi },
-  { 2470, PT_SCX, ucp_Warang_Citi },
-  { 2481, PT_SCX, ucp_Wancho },
+  { 2403, PT_SC, ucp_Tangsa },
+  { 2408, PT_SC, ucp_Toto },
+  { 2413, PT_SC, ucp_Ugaritic },
+  { 2418, PT_SC, ucp_Ugaritic },
+  { 2427, PT_SC, ucp_Unknown },
+  { 2435, PT_SC, ucp_Vai },
+  { 2439, PT_SC, ucp_Vai },
+  { 2444, PT_SC, ucp_Vithkuqi },
+  { 2449, PT_SC, ucp_Vithkuqi },
+  { 2458, PT_SC, ucp_Wancho },
+  { 2465, PT_SC, ucp_Warang_Citi },
+  { 2470, PT_SC, ucp_Warang_Citi },
+  { 2481, PT_SC, ucp_Wancho },
  { 2486, PT_ALNUM, 0 },
-  { 2490, PT_SCX, ucp_Old_Persian },
+  { 2490, PT_SC, ucp_Old_Persian },
  { 2495, PT_PXSPACE, 0 },
  { 2499, PT_SPACE, 0 },
-  { 2503, PT_SCX, ucp_Cuneiform },
+  { 2503, PT_SC, ucp_Cuneiform },
  { 2508, PT_UCNC, 0 },
  { 2512, PT_WORD, 0 },
  { 2516, PT_SCX, ucp_Yezidi },
@ -1210,14 +1210,14 @@ const ucp_type_table PRIV(utt)[] = {
  { 2528, PT_SCX, ucp_Yi },
  { 2531, PT_SCX, ucp_Yi },
  { 2536, PT_GC, ucp_Z },
-  { 2538, PT_SCX, ucp_Zanabazar_Square },
-  { 2554, PT_SCX, ucp_Zanabazar_Square },
-  { 2559, PT_SCX, ucp_Inherited },
+  { 2538, PT_SC, ucp_Zanabazar_Square },
+  { 2554, PT_SC, ucp_Zanabazar_Square },
+  { 2559, PT_SC, ucp_Inherited },
  { 2564, PT_PC, ucp_Zl },
  { 2567, PT_PC, ucp_Zp },
  { 2570, PT_PC, ucp_Zs },
-  { 2573, PT_SCX, ucp_Common },
-  { 2578, PT_SCX, ucp_Unknown }
+  { 2573, PT_SC, ucp_Common },
+  { 2578, PT_SC, ucp_Unknown }
 };

 const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
--- a/src/pcre2_xclass.c
+++ b/src/pcre2_xclass.c
@ -134,7 +134,6 @@ while ((t = *data++) != XCL_END)
  else  /* XCL_PROP & XCL_NOTPROP */
    {
    const ucd_record *prop = GET_UCD(c);
-    int scriptx; 
    BOOL isprop = t == XCL_PROP;
    BOOL ok; 

@ -163,10 +162,8 @@ while ((t = *data++) != XCL_END)
      break;

      case PT_SCX:
-      scriptx = prop->scriptx; 
-      ok = data[1] == prop->script || data[1] == (PCRE2_UCHAR)scriptx;
-      if (!ok && scriptx < 0)
-        ok = MAPBIT(PRIV(ucd_script_sets) - scriptx, data[1]);  
+      ok = (data[1] == prop->script ||
+            MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, data[1]) != 0);
      if (ok == isprop) return !negated;
      break;