Extend unicode boolean property bitset index to 12 bit (#81)

Co-authored-by: Zoltan Herczeg <hzmester@freemail.hu>
This commit is contained in:
Zoltan Herczeg 2022-01-14 16:51:03 +01:00 committed by GitHub
parent e85a81ebac
commit e21345de97
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 1428 additions and 1440 deletions

View File

@ -181,7 +181,6 @@
# -32 (-0x20) => Other case is U+0041 # -32 (-0x20) => Other case is U+0041
# 18432 = 0x4800 => Combined Bidi class + script extension values # 18432 = 0x4800 => Combined Bidi class + script extension values
# 22 => Offset to Boolean properties # 22 => Offset to Boolean properties
# 0 => Dummy value, unused at present
# #
# The top 5 bits of the sixth field are the Bidi class, with the rest being the # The top 5 bits of the sixth field are the Bidi class, with the rest being the
# script extension value, giving: # script extension value, giving:
@ -223,7 +222,6 @@
# 0 => No other case # 0 => No other case
# 26762 = 0x688A => Combined Bidi class + script extension values # 26762 = 0x688A => Combined Bidi class + script extension values
# 48 => Offset to Boolean properties # 48 => Offset to Boolean properties
# 0 => Dummy value, unused at present
# #
# The top 5 bits of the sixth field are the Bidi class, with the rest being the # The top 5 bits of the sixth field are the Bidi class, with the rest being the
# script extension value, giving: # script extension value, giving:
@ -642,15 +640,7 @@ for c in range(MAX_UNICODE):
bool_props_lists.append(bprops[c]) bool_props_lists.append(bprops[c])
i += 1 i += 1
bool_props[c] = i bool_props[c] = i * bool_props_list_item_size
# With the addition of the Script Extensions field, we needed some padding to
# get the Unicode records up to 12 bytes (multiple of 4). Originally this was a
# 16-bit field and padding_dummy[0] was set to 256 to ensure this, but 8 bits
# are now used, so zero will do.
padding_dummy = [0] * MAX_UNICODE
padding_dummy[0] = 0
# This block of code was added by PH in September 2012. It scans the other_case # This block of code was added by PH in September 2012. It scans the other_case
# table to find sets of more than two characters that must all match each other # table to find sets of more than two characters that must all match each other
@ -724,7 +714,7 @@ for s in caseless_sets:
# Combine all the tables # Combine all the tables
table, records = combine_tables(script, category, break_props, table, records = combine_tables(script, category, break_props,
caseless_offsets, other_case, scriptx_bidi_class, bool_props, padding_dummy) caseless_offsets, other_case, scriptx_bidi_class, bool_props)
# Find the record size and create a string definition of the structure for # Find the record size and create a string definition of the structure for
# outputting as a comment. # outputting as a comment.
@ -816,7 +806,6 @@ const ucd_record PRIV(dummy_ucd_record)[] = {{
0, /* other case */ 0, /* other case */
0 | (ucp_bidiL << UCD_BIDICLASS_SHIFT), /* script extension and bidi class */ 0 | (ucp_bidiL << UCD_BIDICLASS_SHIFT), /* script extension and bidi class */
0, /* bool properties offset */ 0, /* bool properties offset */
0 /* dummy filler */
}}; }};
#endif #endif
\n""") \n""")

View File

@ -1251,7 +1251,7 @@ for (;;)
case PT_BOOL: case PT_BOOL:
OK = MAPBIT(PRIV(ucd_boolprop_sets) + OK = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, code[2]) != 0; UCD_BPROPS_PROP(prop), code[2]) != 0;
break; break;
/* Should never occur, but keep compilers from grumbling. */ /* Should never occur, but keep compilers from grumbling. */
@ -1523,7 +1523,7 @@ for (;;)
case PT_BOOL: case PT_BOOL:
OK = MAPBIT(PRIV(ucd_boolprop_sets) + OK = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, code[3]) != 0; UCD_BPROPS_PROP(prop), code[3]) != 0;
break; break;
/* Should never occur, but keep compilers from grumbling. */ /* Should never occur, but keep compilers from grumbling. */
@ -1778,7 +1778,7 @@ for (;;)
case PT_BOOL: case PT_BOOL:
OK = MAPBIT(PRIV(ucd_boolprop_sets) + OK = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, code[3]) != 0; UCD_BPROPS_PROP(prop), code[3]) != 0;
break; break;
/* Should never occur, but keep compilers from grumbling. */ /* Should never occur, but keep compilers from grumbling. */
@ -2059,8 +2059,7 @@ for (;;)
case PT_BOOL: case PT_BOOL:
OK = MAPBIT(PRIV(ucd_boolprop_sets) + OK = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, UCD_BPROPS_PROP(prop), code[1 + IMM2_SIZE + 2]) != 0;
code[1 + IMM2_SIZE + 2]) != 0;
break; break;
/* Should never occur, but keep compilers from grumbling. */ /* Should never occur, but keep compilers from grumbling. */

View File

@ -1823,8 +1823,7 @@ typedef struct {
uint8_t caseset; /* offset to multichar other cases or zero */ uint8_t caseset; /* offset to multichar other cases or zero */
int32_t other_case; /* offset to other case, or zero if none */ int32_t other_case; /* offset to other case, or zero if none */
uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */ uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */
uint8_t bprops; /* binary properties offset */ uint16_t bprops; /* binary properties offset */
uint8_t dummy; /* spare - to round to multiple of 4 bytes */
} ucd_record; } ucd_record;
/* UCD access macros */ /* UCD access macros */
@ -1843,9 +1842,11 @@ typedef struct {
#define UCD_SCRIPTX_MASK 0x3ff #define UCD_SCRIPTX_MASK 0x3ff
#define UCD_BIDICLASS_SHIFT 11 #define UCD_BIDICLASS_SHIFT 11
#define UCD_BPROPS_MASK 0xfff
#define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK) #define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK)
#define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT) #define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT)
#define UCD_BPROPS_PROP(prop) ((prop)->bprops & UCD_BPROPS_MASK)
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype #define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
#define UCD_SCRIPT(ch) GET_UCD(ch)->script #define UCD_SCRIPT(ch) GET_UCD(ch)->script
@ -1854,7 +1855,7 @@ typedef struct {
#define UCD_CASESET(ch) GET_UCD(ch)->caseset #define UCD_CASESET(ch) GET_UCD(ch)->caseset
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case))) #define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
#define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch)) #define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch))
#define UCD_BPROPS(ch) GET_UCD(ch)->bprops #define UCD_BPROPS(ch) UCD_BPROPS_PROP(GET_UCD(ch))
#define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch)) #define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch))
/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words /* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words

View File

@ -7708,8 +7708,9 @@ if (unicode_status & XCLASS_NEEDS_UCD)
if (unicode_status & XCLASS_HAS_BOOL) if (unicode_status & XCLASS_HAS_BOOL)
{ {
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops)); OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
while (*cc != XCL_END) while (*cc != XCL_END)
{ {

View File

@ -2521,7 +2521,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
case PT_BOOL: case PT_BOOL:
{ {
BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) + BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, Fecode[2]) != 0; UCD_BPROPS_PROP(prop), Fecode[2]) != 0;
if (ok == notmatch) RRETURN(MATCH_NOMATCH); if (ok == notmatch) RRETURN(MATCH_NOMATCH);
} }
break; break;
@ -2875,7 +2875,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
GETCHARINCTEST(fc, Feptr); GETCHARINCTEST(fc, Feptr);
prop = GET_UCD(fc); prop = GET_UCD(fc);
ok = MAPBIT(PRIV(ucd_boolprop_sets) + ok = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, Lpropvalue) != 0; UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
if (ok == notmatch) if (ok == notmatch)
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
@ -3695,7 +3695,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
GETCHARINCTEST(fc, Feptr); GETCHARINCTEST(fc, Feptr);
prop = GET_UCD(fc); prop = GET_UCD(fc);
ok = MAPBIT(PRIV(ucd_boolprop_sets) + ok = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, Lpropvalue) != 0; UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
if (ok == (Lctype == OP_NOTPROP)) if (ok == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
@ -4263,7 +4263,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
GETCHARLENTEST(fc, Feptr, len); GETCHARLENTEST(fc, Feptr, len);
prop = GET_UCD(fc); prop = GET_UCD(fc);
ok = MAPBIT(PRIV(ucd_boolprop_sets) + ok = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, Lpropvalue) != 0; UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
if (ok == notmatch) break; if (ok == notmatch) break;
Feptr+= len; Feptr+= len;
} }

File diff suppressed because it is too large Load Diff

View File

@ -221,7 +221,7 @@ while ((t = *data++) != XCL_END)
case PT_BOOL: case PT_BOOL:
ok = MAPBIT(PRIV(ucd_boolprop_sets) + ok = MAPBIT(PRIV(ucd_boolprop_sets) +
prop->bprops * ucd_boolprop_sets_item_size, data[1]) != 0; UCD_BPROPS_PROP(prop), data[1]) != 0;
if (ok == isprop) return !negated; if (ok == isprop) return !negated;
break; break;