Merge scriptx and bidi fields (#78)

Co-authored-by: Zoltan Herczeg <hzmester@freemail.hu>
This commit is contained in:
Zoltan Herczeg 2022-01-12 18:00:12 +01:00 committed by GitHub
parent 7f7d3e8521
commit 061e57695a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 1446 additions and 1438 deletions

View File

@ -538,7 +538,11 @@ file.close()
script_lists = [[]]
last_script_extension = ""
scriptx = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0)
scriptx_bidi_class = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0)
for idx in range(len(scriptx_bidi_class)):
scriptx_bidi_class[idx] = scriptx_bidi_class[idx] | (bidi_class[idx] << 11)
bidi_class = None
# Find the Boolean properties of each character. This next bit of magic creates
# a list of empty lists. Using [[]] * MAX_UNICODE gives a list of references to
@ -704,7 +708,7 @@ for s in caseless_sets:
# Combine all the tables
table, records = combine_tables(script, category, break_props,
caseless_offsets, other_case, scriptx, bidi_class, bool_props, padding_dummy)
caseless_offsets, other_case, scriptx_bidi_class, bool_props, padding_dummy)
# Find the record size and create a string definition of the structure for
# outputting as a comment.
@ -794,8 +798,7 @@ const ucd_record PRIV(dummy_ucd_record)[] = {{
ucp_gbOther, /* grapheme break property */
0, /* case set */
0, /* other case */
ucp_Unknown, /* script extension */
ucp_bidiL, /* bidi class */
0 | (ucp_bidiL << UCD_BIDICLASS_SHIFT), /* script extension and bidi class */
0, /* bool properties offset */
0 /* dummy filler */
}};

View File

@ -221,7 +221,7 @@ switch(ptype)
case PT_SCX:
ok = (pdata == prop->script
|| MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, pdata) != 0);
|| MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
return ok == negated;
/* These are specials */

View File

@ -1195,7 +1195,7 @@ for (;;)
case PT_SCX:
OK = (prop->script == code[2] ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[2]) != 0);
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[2]) != 0);
break;
/* These are specials for combination cases. */
@ -1467,7 +1467,7 @@ for (;;)
case PT_SCX:
OK = (prop->script == code[3] ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[3]) != 0);
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
break;
/* These are specials for combination cases. */
@ -1722,7 +1722,7 @@ for (;;)
case PT_SCX:
OK = (prop->script == code[3] ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[3]) != 0);
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
break;
/* These are specials for combination cases. */
@ -2002,7 +2002,7 @@ for (;;)
case PT_SCX:
OK = (prop->script == code[1 + IMM2_SIZE + 2] ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx,
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop),
code[1 + IMM2_SIZE + 2]) != 0);
break;

View File

@ -1822,8 +1822,7 @@ typedef struct {
uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */
uint8_t caseset; /* offset to multichar other cases or zero */
int32_t other_case; /* offset to other case, or zero if none */
uint8_t scriptx; /* script extension value */
uint8_t bidi; /* bidi class */
uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */
uint8_t bprops; /* binary properties offset */
uint8_t dummy; /* spare - to round to multiple of 4 bytes */
} ucd_record;
@ -1842,15 +1841,21 @@ typedef struct {
#define GET_UCD(ch) REAL_GET_UCD(ch)
#endif
#define UCD_SCRIPTX_MASK 0x3ff
#define UCD_BIDICLASS_SHIFT 11
#define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK)
#define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT)
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
#define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop
#define UCD_CASESET(ch) GET_UCD(ch)->caseset
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
#define UCD_SCRIPTX(ch) GET_UCD(ch)->scriptx
#define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch))
#define UCD_BPROPS(ch) GET_UCD(ch)->bprops
#define UCD_BIDICLASS(ch) GET_UCD(ch)->bidi
#define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch))
/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words
that form a bitmap representing a list of scripts or boolean properties. These

View File

@ -7670,7 +7670,8 @@ if (unicode_status & XCLASS_NEEDS_UCD)
if (unicode_status & XCLASS_HAS_BIDICL)
{
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bidi));
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
while (*cc != XCL_END)
{
@ -7789,7 +7790,8 @@ if (unicode_status & XCLASS_NEEDS_UCD)
if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
{
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx));
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)

View File

@ -2455,7 +2455,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
case PT_SCX:
{
BOOL ok = (Fecode[2] == prop->script ||
MAPBIT((PRIV(ucd_script_sets) + prop->scriptx), Fecode[2]) != 0);
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0);
if (ok == notmatch) RRETURN(MATCH_NOMATCH);
}
break;
@ -2514,7 +2514,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
break;
case PT_BIDICL:
if ((prop->bidi == Fecode[2]) == notmatch)
if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch)
RRETURN(MATCH_NOMATCH);
break;
@ -2737,7 +2737,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
GETCHARINCTEST(fc, Feptr);
prop = GET_UCD(fc);
ok = (prop->script == Lpropvalue ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
if (ok == notmatch)
RRETURN(MATCH_NOMATCH);
}
@ -3535,7 +3535,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
GETCHARINCTEST(fc, Feptr);
prop = GET_UCD(fc);
ok = (prop->script == Lpropvalue
|| MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
|| MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
if (ok == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
@ -4117,7 +4117,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
GETCHARLENTEST(fc, Feptr, len);
prop = GET_UCD(fc);
ok = (prop->script == Lpropvalue ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
if (ok == notmatch) break;
Feptr+= len;
}

View File

@ -136,7 +136,7 @@ for (;;)
Common is always accepted with any script. If there are extensions, the
following processing happens for all scripts. */
if (ucd->scriptx != 0 || (script != ucp_Inherited && script != ucp_Common))
if (UCD_SCRIPTX_PROP(ucd) != 0 || (script != ucp_Inherited && script != ucp_Common))
{
BOOL OK;
@ -146,7 +146,7 @@ for (;;)
zero, and then, except for Common or Inherited, add this script's bit to
the map. */
memcpy(map, PRIV(ucd_script_sets) + ucd->scriptx, UCD_MAPSIZE * sizeof(uint32_t));
memcpy(map, PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(ucd), UCD_MAPSIZE * sizeof(uint32_t));
memset(map + UCD_MAPSIZE, 0, (FULL_MAPSIZE - UCD_MAPSIZE) * sizeof(uint32_t));
if (script != ucp_Common && script != ucp_Inherited) MAPSET(map, script);

File diff suppressed because it is too large Load Diff

View File

@ -163,7 +163,7 @@ while ((t = *data++) != XCL_END)
case PT_SCX:
ok = (data[1] == prop->script ||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, data[1]) != 0);
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), data[1]) != 0);
if (ok == isprop) return !negated;
break;
@ -215,7 +215,7 @@ while ((t = *data++) != XCL_END)
break;
case PT_BIDICL:
if ((prop->bidi == data[1]) == isprop)
if ((UCD_BIDICLASS_PROP(prop) == data[1]) == isprop)
return !negated;
break;