Merge scriptx and bidi fields (#78)
Co-authored-by: Zoltan Herczeg <hzmester@freemail.hu>
This commit is contained in:
parent
7f7d3e8521
commit
061e57695a
|
@ -538,7 +538,11 @@ file.close()
|
||||||
|
|
||||||
script_lists = [[]]
|
script_lists = [[]]
|
||||||
last_script_extension = ""
|
last_script_extension = ""
|
||||||
scriptx = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0)
|
scriptx_bidi_class = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0)
|
||||||
|
|
||||||
|
for idx in range(len(scriptx_bidi_class)):
|
||||||
|
scriptx_bidi_class[idx] = scriptx_bidi_class[idx] | (bidi_class[idx] << 11)
|
||||||
|
bidi_class = None
|
||||||
|
|
||||||
# Find the Boolean properties of each character. This next bit of magic creates
|
# Find the Boolean properties of each character. This next bit of magic creates
|
||||||
# a list of empty lists. Using [[]] * MAX_UNICODE gives a list of references to
|
# a list of empty lists. Using [[]] * MAX_UNICODE gives a list of references to
|
||||||
|
@ -704,7 +708,7 @@ for s in caseless_sets:
|
||||||
# Combine all the tables
|
# Combine all the tables
|
||||||
|
|
||||||
table, records = combine_tables(script, category, break_props,
|
table, records = combine_tables(script, category, break_props,
|
||||||
caseless_offsets, other_case, scriptx, bidi_class, bool_props, padding_dummy)
|
caseless_offsets, other_case, scriptx_bidi_class, bool_props, padding_dummy)
|
||||||
|
|
||||||
# Find the record size and create a string definition of the structure for
|
# Find the record size and create a string definition of the structure for
|
||||||
# outputting as a comment.
|
# outputting as a comment.
|
||||||
|
@ -794,8 +798,7 @@ const ucd_record PRIV(dummy_ucd_record)[] = {{
|
||||||
ucp_gbOther, /* grapheme break property */
|
ucp_gbOther, /* grapheme break property */
|
||||||
0, /* case set */
|
0, /* case set */
|
||||||
0, /* other case */
|
0, /* other case */
|
||||||
ucp_Unknown, /* script extension */
|
0 | (ucp_bidiL << UCD_BIDICLASS_SHIFT), /* script extension and bidi class */
|
||||||
ucp_bidiL, /* bidi class */
|
|
||||||
0, /* bool properties offset */
|
0, /* bool properties offset */
|
||||||
0 /* dummy filler */
|
0 /* dummy filler */
|
||||||
}};
|
}};
|
||||||
|
|
|
@ -221,7 +221,7 @@ switch(ptype)
|
||||||
|
|
||||||
case PT_SCX:
|
case PT_SCX:
|
||||||
ok = (pdata == prop->script
|
ok = (pdata == prop->script
|
||||||
|| MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, pdata) != 0);
|
|| MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
|
||||||
return ok == negated;
|
return ok == negated;
|
||||||
|
|
||||||
/* These are specials */
|
/* These are specials */
|
||||||
|
|
|
@ -1195,7 +1195,7 @@ for (;;)
|
||||||
|
|
||||||
case PT_SCX:
|
case PT_SCX:
|
||||||
OK = (prop->script == code[2] ||
|
OK = (prop->script == code[2] ||
|
||||||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[2]) != 0);
|
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[2]) != 0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* These are specials for combination cases. */
|
/* These are specials for combination cases. */
|
||||||
|
@ -1467,7 +1467,7 @@ for (;;)
|
||||||
|
|
||||||
case PT_SCX:
|
case PT_SCX:
|
||||||
OK = (prop->script == code[3] ||
|
OK = (prop->script == code[3] ||
|
||||||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[3]) != 0);
|
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* These are specials for combination cases. */
|
/* These are specials for combination cases. */
|
||||||
|
@ -1722,7 +1722,7 @@ for (;;)
|
||||||
|
|
||||||
case PT_SCX:
|
case PT_SCX:
|
||||||
OK = (prop->script == code[3] ||
|
OK = (prop->script == code[3] ||
|
||||||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, code[3]) != 0);
|
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* These are specials for combination cases. */
|
/* These are specials for combination cases. */
|
||||||
|
@ -2002,7 +2002,7 @@ for (;;)
|
||||||
|
|
||||||
case PT_SCX:
|
case PT_SCX:
|
||||||
OK = (prop->script == code[1 + IMM2_SIZE + 2] ||
|
OK = (prop->script == code[1 + IMM2_SIZE + 2] ||
|
||||||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx,
|
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop),
|
||||||
code[1 + IMM2_SIZE + 2]) != 0);
|
code[1 + IMM2_SIZE + 2]) != 0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
|
@ -1822,8 +1822,7 @@ typedef struct {
|
||||||
uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */
|
uint8_t gbprop; /* ucp_gbControl, etc. (grapheme break property) */
|
||||||
uint8_t caseset; /* offset to multichar other cases or zero */
|
uint8_t caseset; /* offset to multichar other cases or zero */
|
||||||
int32_t other_case; /* offset to other case, or zero if none */
|
int32_t other_case; /* offset to other case, or zero if none */
|
||||||
uint8_t scriptx; /* script extension value */
|
uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */
|
||||||
uint8_t bidi; /* bidi class */
|
|
||||||
uint8_t bprops; /* binary properties offset */
|
uint8_t bprops; /* binary properties offset */
|
||||||
uint8_t dummy; /* spare - to round to multiple of 4 bytes */
|
uint8_t dummy; /* spare - to round to multiple of 4 bytes */
|
||||||
} ucd_record;
|
} ucd_record;
|
||||||
|
@ -1842,15 +1841,21 @@ typedef struct {
|
||||||
#define GET_UCD(ch) REAL_GET_UCD(ch)
|
#define GET_UCD(ch) REAL_GET_UCD(ch)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define UCD_SCRIPTX_MASK 0x3ff
|
||||||
|
#define UCD_BIDICLASS_SHIFT 11
|
||||||
|
|
||||||
|
#define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK)
|
||||||
|
#define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT)
|
||||||
|
|
||||||
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
|
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
|
||||||
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
|
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
|
||||||
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
|
#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
|
||||||
#define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop
|
#define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop
|
||||||
#define UCD_CASESET(ch) GET_UCD(ch)->caseset
|
#define UCD_CASESET(ch) GET_UCD(ch)->caseset
|
||||||
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
|
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
|
||||||
#define UCD_SCRIPTX(ch) GET_UCD(ch)->scriptx
|
#define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch))
|
||||||
#define UCD_BPROPS(ch) GET_UCD(ch)->bprops
|
#define UCD_BPROPS(ch) GET_UCD(ch)->bprops
|
||||||
#define UCD_BIDICLASS(ch) GET_UCD(ch)->bidi
|
#define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch))
|
||||||
|
|
||||||
/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words
|
/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words
|
||||||
that form a bitmap representing a list of scripts or boolean properties. These
|
that form a bitmap representing a list of scripts or boolean properties. These
|
||||||
|
|
|
@ -7670,7 +7670,8 @@ if (unicode_status & XCLASS_NEEDS_UCD)
|
||||||
|
|
||||||
if (unicode_status & XCLASS_HAS_BIDICL)
|
if (unicode_status & XCLASS_HAS_BIDICL)
|
||||||
{
|
{
|
||||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bidi));
|
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
|
||||||
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
|
||||||
|
|
||||||
while (*cc != XCL_END)
|
while (*cc != XCL_END)
|
||||||
{
|
{
|
||||||
|
@ -7789,7 +7790,8 @@ if (unicode_status & XCLASS_NEEDS_UCD)
|
||||||
|
|
||||||
if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
|
if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
|
||||||
{
|
{
|
||||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx));
|
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
|
||||||
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
|
||||||
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
|
||||||
|
|
||||||
if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
|
if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
|
||||||
|
|
|
@ -2455,7 +2455,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
case PT_SCX:
|
case PT_SCX:
|
||||||
{
|
{
|
||||||
BOOL ok = (Fecode[2] == prop->script ||
|
BOOL ok = (Fecode[2] == prop->script ||
|
||||||
MAPBIT((PRIV(ucd_script_sets) + prop->scriptx), Fecode[2]) != 0);
|
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0);
|
||||||
if (ok == notmatch) RRETURN(MATCH_NOMATCH);
|
if (ok == notmatch) RRETURN(MATCH_NOMATCH);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -2514,7 +2514,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_BIDICL:
|
case PT_BIDICL:
|
||||||
if ((prop->bidi == Fecode[2]) == notmatch)
|
if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch)
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -2737,7 +2737,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
GETCHARINCTEST(fc, Feptr);
|
GETCHARINCTEST(fc, Feptr);
|
||||||
prop = GET_UCD(fc);
|
prop = GET_UCD(fc);
|
||||||
ok = (prop->script == Lpropvalue ||
|
ok = (prop->script == Lpropvalue ||
|
||||||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
|
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
|
||||||
if (ok == notmatch)
|
if (ok == notmatch)
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
}
|
}
|
||||||
|
@ -3535,7 +3535,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
GETCHARINCTEST(fc, Feptr);
|
GETCHARINCTEST(fc, Feptr);
|
||||||
prop = GET_UCD(fc);
|
prop = GET_UCD(fc);
|
||||||
ok = (prop->script == Lpropvalue
|
ok = (prop->script == Lpropvalue
|
||||||
|| MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
|
|| MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
|
||||||
if (ok == (Lctype == OP_NOTPROP))
|
if (ok == (Lctype == OP_NOTPROP))
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
}
|
}
|
||||||
|
@ -4117,7 +4117,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
GETCHARLENTEST(fc, Feptr, len);
|
GETCHARLENTEST(fc, Feptr, len);
|
||||||
prop = GET_UCD(fc);
|
prop = GET_UCD(fc);
|
||||||
ok = (prop->script == Lpropvalue ||
|
ok = (prop->script == Lpropvalue ||
|
||||||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, Lpropvalue) != 0);
|
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
|
||||||
if (ok == notmatch) break;
|
if (ok == notmatch) break;
|
||||||
Feptr+= len;
|
Feptr+= len;
|
||||||
}
|
}
|
||||||
|
|
|
@ -136,7 +136,7 @@ for (;;)
|
||||||
Common is always accepted with any script. If there are extensions, the
|
Common is always accepted with any script. If there are extensions, the
|
||||||
following processing happens for all scripts. */
|
following processing happens for all scripts. */
|
||||||
|
|
||||||
if (ucd->scriptx != 0 || (script != ucp_Inherited && script != ucp_Common))
|
if (UCD_SCRIPTX_PROP(ucd) != 0 || (script != ucp_Inherited && script != ucp_Common))
|
||||||
{
|
{
|
||||||
BOOL OK;
|
BOOL OK;
|
||||||
|
|
||||||
|
@ -146,7 +146,7 @@ for (;;)
|
||||||
zero, and then, except for Common or Inherited, add this script's bit to
|
zero, and then, except for Common or Inherited, add this script's bit to
|
||||||
the map. */
|
the map. */
|
||||||
|
|
||||||
memcpy(map, PRIV(ucd_script_sets) + ucd->scriptx, UCD_MAPSIZE * sizeof(uint32_t));
|
memcpy(map, PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(ucd), UCD_MAPSIZE * sizeof(uint32_t));
|
||||||
memset(map + UCD_MAPSIZE, 0, (FULL_MAPSIZE - UCD_MAPSIZE) * sizeof(uint32_t));
|
memset(map + UCD_MAPSIZE, 0, (FULL_MAPSIZE - UCD_MAPSIZE) * sizeof(uint32_t));
|
||||||
if (script != ucp_Common && script != ucp_Inherited) MAPSET(map, script);
|
if (script != ucp_Common && script != ucp_Inherited) MAPSET(map, script);
|
||||||
|
|
||||||
|
|
2824
src/pcre2_ucd.c
2824
src/pcre2_ucd.c
File diff suppressed because it is too large
Load Diff
|
@ -163,7 +163,7 @@ while ((t = *data++) != XCL_END)
|
||||||
|
|
||||||
case PT_SCX:
|
case PT_SCX:
|
||||||
ok = (data[1] == prop->script ||
|
ok = (data[1] == prop->script ||
|
||||||
MAPBIT(PRIV(ucd_script_sets) + prop->scriptx, data[1]) != 0);
|
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), data[1]) != 0);
|
||||||
if (ok == isprop) return !negated;
|
if (ok == isprop) return !negated;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -215,7 +215,7 @@ while ((t = *data++) != XCL_END)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PT_BIDICL:
|
case PT_BIDICL:
|
||||||
if ((prop->bidi == data[1]) == isprop)
|
if ((UCD_BIDICLASS_PROP(prop) == data[1]) == isprop)
|
||||||
return !negated;
|
return !negated;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue