Merge scriptx and bidi fields (#78)

Co-authored-by: Zoltan Herczeg <hzmester@freemail.hu>
This commit is contained in:
Zoltan Herczeg 2022-01-12 18:00:12 +01:00 committed by GitHub
parent 7f7d3e8521
commit 061e57695a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 1446 additions and 1438 deletions

View File

@ -538,7 +538,11 @@ file.close()
script_lists = [[]] script_lists = [[]]
last_script_extension = "" last_script_extension = ""
scriptx = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0) scriptx_bidi_class = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0)
for idx in range(len(scriptx_bidi_class)):
scriptx_bidi_class[idx] = scriptx_bidi_class[idx] | (bidi_class[idx] << 11)
bidi_class = None
# Find the Boolean properties of each character. This next bit of magic creates # Find the Boolean properties of each character. This next bit of magic creates
# a list of empty lists. Using [[]] * MAX_UNICODE gives a list of references to # a list of empty lists. Using [[]] * MAX_UNICODE gives a list of references to
@ -704,7 +708,7 @@ for s in caseless_sets:
# Combine all the tables # Combine all the tables
table, records = combine_tables(script, category, break_props, table, records = combine_tables(script, category, break_props,
caseless_offsets, other_case, scriptx, bidi_class, bool_props, padding_dummy) caseless_offsets, other_case, scriptx_bidi_class, bool_props, padding_dummy)
# Find the record size and create a string definition of the structure for # Find the record size and create a string definition of the structure for
# outputting as a comment. # outputting as a comment.
@ -794,8 +798,7 @@ const ucd_record PRIV(dummy_ucd_record)[] = {{
ucp_gbOther, /* grapheme break property */ ucp_gbOther, /* grapheme break property */
0, /* case set */ 0, /* case set */
0, /* other case */ 0, /* other case */
ucp_Unknown, /* script extension */ 0 | (ucp_bidiL << UCD_BIDICLASS_SHIFT), /* script extension and bidi class */
ucp_bidiL, /* bidi class */
0, /* bool properties offset */ 0, /* bool properties offset */
0 /* dummy filler */ 0 /* dummy filler */
}}; }};

View File

@ -221,7 +221,7 @@ switch(ptype)
case PT_SCX: case PT_SCX:
ok = (pdata == prop->script ok = (pdata == prop->script
|| MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, pdata) != 0); || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
return ok == negated; return ok == negated;
/* These are specials */ /* These are specials */

View File

@ -1195,7 +1195,7 @@ for (;;)
case PT_SCX: case PT_SCX:
OK = (prop->script == code[2] || OK = (prop->script == code[2] ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[2]) != 0); MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[2]) != 0);
break; break;
/* These are specials for combination cases. */ /* These are specials for combination cases. */
@ -1467,7 +1467,7 @@ for (;;)
case PT_SCX: case PT_SCX:
OK = (prop->script == code[3] || OK = (prop->script == code[3] ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[3]) != 0); MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
break; break;
/* These are specials for combination cases. */ /* These are specials for combination cases. */
@ -1722,7 +1722,7 @@ for (;;)
case PT_SCX: case PT_SCX:
OK = (prop->script == code[3] || OK = (prop->script == code[3] ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[3]) != 0); MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
break; break;
/* These are specials for combination cases. */ /* These are specials for combination cases. */
@ -2002,7 +2002,7 @@ for (;;)
case PT_SCX: case PT_SCX:
OK = (prop->script == code[1 + IMM2_SIZE + 2] || OK = (prop->script == code[1 + IMM2_SIZE + 2] ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop),
code[1 + IMM2_SIZE + 2]) != 0); code[1 + IMM2_SIZE + 2]) != 0);
break; break;

View File

@ -1822,8 +1822,7 @@ typedef struct {
uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */ uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */
uint8_t caseset; /* offset to multichar other cases or zero */ uint8_t caseset; /* offset to multichar other cases or zero */
int32_t other_case; /* offset to other case, or zero if none */ int32_t other_case; /* offset to other case, or zero if none */
uint8_t scriptx; /* script extension value */ uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */
uint8_t bidi; /* bidi class */
uint8_t bprops; /* binary properties offset */ uint8_t bprops; /* binary properties offset */
uint8_t dummy; /* spare - to round to multiple of 4 bytes */ uint8_t dummy; /* spare - to round to multiple of 4 bytes */
} ucd_record; } ucd_record;
@ -1842,15 +1841,21 @@ typedef struct {
#define GET_UCD(ch) REAL_GET_UCD(ch) #define GET_UCD(ch) REAL_GET_UCD(ch)
#endif #endif
#define UCD_SCRIPTX_MASK 0x3ff
#define UCD_BIDICLASS_SHIFT 11
#define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK)
#define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT)
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype #define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
#define UCD_SCRIPT(ch) GET_UCD(ch)->script #define UCD_SCRIPT(ch) GET_UCD(ch)->script
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)] #define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
#define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop #define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop
#define UCD_CASESET(ch) GET_UCD(ch)->caseset #define UCD_CASESET(ch) GET_UCD(ch)->caseset
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case))) #define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
#define UCD_SCRIPTX(ch) GET_UCD(ch)->scriptx #define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch))
#define UCD_BPROPS(ch) GET_UCD(ch)->bprops #define UCD_BPROPS(ch) GET_UCD(ch)->bprops
#define UCD_BIDICLASS(ch) GET_UCD(ch)->bidi #define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch))
/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words /* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words
that form a bitmap representing a list of scripts or boolean properties. These that form a bitmap representing a list of scripts or boolean properties. These

View File

@ -7670,7 +7670,8 @@ if (unicode_status & XCLASS_NEEDS_UCD)
if (unicode_status & XCLASS_HAS_BIDICL) if (unicode_status & XCLASS_HAS_BIDICL)
{ {
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bidi)); OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
while (*cc != XCL_END) while (*cc != XCL_END)
{ {
@ -7789,7 +7790,8 @@ if (unicode_status & XCLASS_NEEDS_UCD)
if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION) if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
{ {
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx)); OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP) if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)

View File

@ -2455,7 +2455,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
case PT_SCX: case PT_SCX:
{ {
BOOL ok = (Fecode[2] == prop->script || BOOL ok = (Fecode[2] == prop->script ||
MAPBIT((PRIV(ucd_script_sets) + prop->scriptx), Fecode[2]) != 0); MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0);
if (ok == notmatch) RRETURN(MATCH_NOMATCH); if (ok == notmatch) RRETURN(MATCH_NOMATCH);
} }
break; break;
@ -2514,7 +2514,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
break; break;
case PT_BIDICL: case PT_BIDICL:
if ((prop->bidi == Fecode[2]) == notmatch) if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch)
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
break; break;
@ -2737,7 +2737,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
GETCHARINCTEST(fc, Feptr); GETCHARINCTEST(fc, Feptr);
prop = GET_UCD(fc); prop = GET_UCD(fc);
ok = (prop->script == Lpropvalue || ok = (prop->script == Lpropvalue ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0); MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
if (ok == notmatch) if (ok == notmatch)
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
@ -3535,7 +3535,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
GETCHARINCTEST(fc, Feptr); GETCHARINCTEST(fc, Feptr);
prop = GET_UCD(fc); prop = GET_UCD(fc);
ok = (prop->script == Lpropvalue ok = (prop->script == Lpropvalue
|| MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0); || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
if (ok == (Lctype == OP_NOTPROP)) if (ok == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
@ -4117,7 +4117,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
GETCHARLENTEST(fc, Feptr, len); GETCHARLENTEST(fc, Feptr, len);
prop = GET_UCD(fc); prop = GET_UCD(fc);
ok = (prop->script == Lpropvalue || ok = (prop->script == Lpropvalue ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0); MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
if (ok == notmatch) break; if (ok == notmatch) break;
Feptr+= len; Feptr+= len;
} }

View File

@ -136,7 +136,7 @@ for (;;)
Common is always accepted with any script. If there are extensions, the Common is always accepted with any script. If there are extensions, the
following processing happens for all scripts. */ following processing happens for all scripts. */
if (ucd->scriptx != 0 || (script != ucp_Inherited && script != ucp_Common)) if (UCD_SCRIPTX_PROP(ucd) != 0 || (script != ucp_Inherited && script != ucp_Common))
{ {
BOOL OK; BOOL OK;
@ -146,7 +146,7 @@ for (;;)
zero, and then, except for Common or Inherited, add this script's bit to zero, and then, except for Common or Inherited, add this script's bit to
the map. */ the map. */
memcpy(map, PRIV(ucd_script_sets) + ucd->scriptx, UCD_MAPSIZE * sizeof(uint32_t)); memcpy(map, PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(ucd), UCD_MAPSIZE * sizeof(uint32_t));
memset(map + UCD_MAPSIZE, 0, (FULL_MAPSIZE - UCD_MAPSIZE) * sizeof(uint32_t)); memset(map + UCD_MAPSIZE, 0, (FULL_MAPSIZE - UCD_MAPSIZE) * sizeof(uint32_t));
if (script != ucp_Common && script != ucp_Inherited) MAPSET(map, script); if (script != ucp_Common && script != ucp_Inherited) MAPSET(map, script);

File diff suppressed because it is too large Load Diff

View File

@ -163,7 +163,7 @@ while ((t = *data++) != XCL_END)
case PT_SCX: case PT_SCX:
ok = (data[1] == prop->script || ok = (data[1] == prop->script ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, data[1]) != 0); MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), data[1]) != 0);
if (ok == isprop) return !negated; if (ok == isprop) return !negated;
break; break;
@ -215,7 +215,7 @@ while ((t = *data++) != XCL_END)
break; break;
case PT_BIDICL: case PT_BIDICL:
if ((prop->bidi == data[1]) == isprop) if ((UCD_BIDICLASS_PROP(prop) == data[1]) == isprop)
return !negated; return !negated;
break; break;