Merge scriptx and bidi fields (#78)
Co-authored-by: Zoltan Herczeg <hzmester@freemail.hu>
This commit is contained in:
parent
7f7d3e8521
commit
061e57695a
|
@ -538,7 +538,11 @@ file.close()
|
|||
|
||||
script_lists = [[]]
|
||||
last_script_extension = ""
|
||||
scriptx = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0)
|
||||
scriptx_bidi_class = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0)
|
||||
|
||||
for idx in range(len(scriptx_bidi_class)):
|
||||
scriptx_bidi_class[idx] = scriptx_bidi_class[idx] | (bidi_class[idx] << 11)
|
||||
bidi_class = None
|
||||
|
||||
# Find the Boolean properties of each character. This next bit of magic creates
|
||||
# a list of empty lists. Using [[]] * MAX_UNICODE gives a list of references to
|
||||
|
@ -704,7 +708,7 @@ for s in caseless_sets:
|
|||
# Combine all the tables
|
||||
|
||||
table, records = combine_tables(script, category, break_props,
|
||||
caseless_offsets, other_case, scriptx, bidi_class, bool_props, padding_dummy)
|
||||
caseless_offsets, other_case, scriptx_bidi_class, bool_props, padding_dummy)
|
||||
|
||||
# Find the record size and create a string definition of the structure for
|
||||
# outputting as a comment.
|
||||
|
@ -794,8 +798,7 @@ const ucd_record PRIV(dummy_ucd_record)[] = {{
|
|||
ucp_gbOther, /* grapheme break property */
|
||||
0, /* case set */
|
||||
0, /* other case */
|
||||
ucp_Unknown, /* script extension */
|
||||
ucp_bidiL, /* bidi class */
|
||||
0 | (ucp_bidiL << UCD_BIDICLASS_SHIFT), /* script extension and bidi class */
|
||||
0, /* bool properties offset */
|
||||
0 /* dummy filler */
|
||||
}};
|
||||
|
|
|
@ -221,7 +221,7 @@ switch(ptype)
|
|||
|
||||
case PT_SCX:
|
||||
ok = (pdata == prop->script
|
||||
|| MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, pdata) != 0);
|
||||
|| MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
|
||||
return ok == negated;
|
||||
|
||||
/* These are specials */
|
||||
|
|
|
@ -1195,7 +1195,7 @@ for (;;)
|
|||
|
||||
case PT_SCX:
|
||||
OK = (prop->script == code[2] ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[2]) != 0);
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[2]) != 0);
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
@ -1467,7 +1467,7 @@ for (;;)
|
|||
|
||||
case PT_SCX:
|
||||
OK = (prop->script == code[3] ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[3]) != 0);
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
@ -1722,7 +1722,7 @@ for (;;)
|
|||
|
||||
case PT_SCX:
|
||||
OK = (prop->script == code[3] ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[3]) != 0);
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
|
||||
break;
|
||||
|
||||
/* These are specials for combination cases. */
|
||||
|
@ -2002,7 +2002,7 @@ for (;;)
|
|||
|
||||
case PT_SCX:
|
||||
OK = (prop->script == code[1 + IMM2_SIZE + 2] ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx,
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop),
|
||||
code[1 + IMM2_SIZE + 2]) != 0);
|
||||
break;
|
||||
|
||||
|
|
|
@ -1822,8 +1822,7 @@ typedef struct {
|
|||
uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */
|
||||
uint8_t caseset; /* offset to multichar other cases or zero */
|
||||
int32_t other_case; /* offset to other case, or zero if none */
|
||||
uint8_t scriptx; /* script extension value */
|
||||
uint8_t bidi; /* bidi class */
|
||||
uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */
|
||||
uint8_t bprops; /* binary properties offset */
|
||||
uint8_t dummy; /* spare - to round to multiple of 4 bytes */
|
||||
} ucd_record;
|
||||
|
@ -1842,15 +1841,21 @@ typedef struct {
|
|||
#define GET_UCD(ch) REAL_GET_UCD(ch)
|
||||
#endif
|
||||
|
||||
#define UCD_SCRIPTX_MASK 0x3ff
|
||||
#define UCD_BIDICLASS_SHIFT 11
|
||||
|
||||
#define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK)
|
||||
#define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT)
|
||||
|
||||
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
|
||||
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
|
||||
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
|
||||
#define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop
|
||||
#define UCD_CASESET(ch) GET_UCD(ch)->caseset
|
||||
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
|
||||
#define UCD_SCRIPTX(ch) GET_UCD(ch)->scriptx
|
||||
#define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch))
|
||||
#define UCD_BPROPS(ch) GET_UCD(ch)->bprops
|
||||
#define UCD_BIDICLASS(ch) GET_UCD(ch)->bidi
|
||||
#define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch))
|
||||
|
||||
/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words
|
||||
that form a bitmap representing a list of scripts or boolean properties. These
|
||||
|
|
|
@ -7670,7 +7670,8 @@ if (unicode_status & XCLASS_NEEDS_UCD)
|
|||
|
||||
if (unicode_status & XCLASS_HAS_BIDICL)
|
||||
{
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bidi));
|
||||
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
|
||||
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
|
||||
|
||||
while (*cc != XCL_END)
|
||||
{
|
||||
|
@ -7789,7 +7790,8 @@ if (unicode_status & XCLASS_NEEDS_UCD)
|
|||
|
||||
if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
|
||||
{
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx));
|
||||
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
|
||||
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
|
||||
|
||||
if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
|
||||
|
|
|
@ -2455,7 +2455,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
case PT_SCX:
|
||||
{
|
||||
BOOL ok = (Fecode[2] == prop->script ||
|
||||
MAPBIT((PRIV(ucd_script_sets) + prop->scriptx), Fecode[2]) != 0);
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0);
|
||||
if (ok == notmatch) RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
|
@ -2514,7 +2514,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
if ((prop->bidi == Fecode[2]) == notmatch)
|
||||
if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
|
@ -2737,7 +2737,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
GETCHARINCTEST(fc, Feptr);
|
||||
prop = GET_UCD(fc);
|
||||
ok = (prop->script == Lpropvalue ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
|
||||
if (ok == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
|
@ -3535,7 +3535,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
GETCHARINCTEST(fc, Feptr);
|
||||
prop = GET_UCD(fc);
|
||||
ok = (prop->script == Lpropvalue
|
||||
|| MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
|
||||
|| MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
|
||||
if (ok == (Lctype == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
|
@ -4117,7 +4117,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
GETCHARLENTEST(fc, Feptr, len);
|
||||
prop = GET_UCD(fc);
|
||||
ok = (prop->script == Lpropvalue ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
|
||||
if (ok == notmatch) break;
|
||||
Feptr+= len;
|
||||
}
|
||||
|
|
|
@ -136,7 +136,7 @@ for (;;)
|
|||
Common is always accepted with any script. If there are extensions, the
|
||||
following processing happens for all scripts. */
|
||||
|
||||
if (ucd->scriptx != 0 || (script != ucp_Inherited && script != ucp_Common))
|
||||
if (UCD_SCRIPTX_PROP(ucd) != 0 || (script != ucp_Inherited && script != ucp_Common))
|
||||
{
|
||||
BOOL OK;
|
||||
|
||||
|
@ -146,7 +146,7 @@ for (;;)
|
|||
zero, and then, except for Common or Inherited, add this script's bit to
|
||||
the map. */
|
||||
|
||||
memcpy(map, PRIV(ucd_script_sets) + ucd->scriptx, UCD_MAPSIZE * sizeof(uint32_t));
|
||||
memcpy(map, PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(ucd), UCD_MAPSIZE * sizeof(uint32_t));
|
||||
memset(map + UCD_MAPSIZE, 0, (FULL_MAPSIZE - UCD_MAPSIZE) * sizeof(uint32_t));
|
||||
if (script != ucp_Common && script != ucp_Inherited) MAPSET(map, script);
|
||||
|
||||
|
|
2824
src/pcre2_ucd.c
2824
src/pcre2_ucd.c
File diff suppressed because it is too large
Load Diff
|
@ -163,7 +163,7 @@ while ((t = *data++) != XCL_END)
|
|||
|
||||
case PT_SCX:
|
||||
ok = (data[1] == prop->script ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, data[1]) != 0);
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), data[1]) != 0);
|
||||
if (ok == isprop) return !negated;
|
||||
break;
|
||||
|
||||
|
@ -215,7 +215,7 @@ while ((t = *data++) != XCL_END)
|
|||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
if ((prop->bidi == data[1]) == isprop)
|
||||
if ((UCD_BIDICLASS_PROP(prop) == data[1]) == isprop)
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
|
|
Loading…
Reference in New Issue