Extend unicode boolean property bitset index to 12 bit (#81)
Co-authored-by: Zoltan Herczeg <hzmester@freemail.hu>
This commit is contained in:
parent
e85a81ebac
commit
e21345de97
|
@ -181,7 +181,6 @@
|
||||||
# -32 (-0x20) => Other case is U+0041
|
# -32 (-0x20) => Other case is U+0041
|
||||||
# 18432 = 0x4800 => Combined Bidi class + script extension values
|
# 18432 = 0x4800 => Combined Bidi class + script extension values
|
||||||
# 22 => Offset to Boolean properties
|
# 22 => Offset to Boolean properties
|
||||||
# 0 => Dummy value, unused at present
|
|
||||||
#
|
#
|
||||||
# The top 5 bits of the sixth field are the Bidi class, with the rest being the
|
# The top 5 bits of the sixth field are the Bidi class, with the rest being the
|
||||||
# script extension value, giving:
|
# script extension value, giving:
|
||||||
|
@ -223,7 +222,6 @@
|
||||||
# 0 => No other case
|
# 0 => No other case
|
||||||
# 26762 = 0x688A => Combined Bidi class + script extension values
|
# 26762 = 0x688A => Combined Bidi class + script extension values
|
||||||
# 48 => Offset to Boolean properties
|
# 48 => Offset to Boolean properties
|
||||||
# 0 => Dummy value, unused at present
|
|
||||||
#
|
#
|
||||||
# The top 5 bits of the sixth field are the Bidi class, with the rest being the
|
# The top 5 bits of the sixth field are the Bidi class, with the rest being the
|
||||||
# script extension value, giving:
|
# script extension value, giving:
|
||||||
|
@ -642,15 +640,7 @@ for c in range(MAX_UNICODE):
|
||||||
bool_props_lists.append(bprops[c])
|
bool_props_lists.append(bprops[c])
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
bool_props[c] = i
|
bool_props[c] = i * bool_props_list_item_size
|
||||||
|
|
||||||
# With the addition of the Script Extensions field, we needed some padding to
|
|
||||||
# get the Unicode records up to 12 bytes (multiple of 4). Originally this was a
|
|
||||||
# 16-bit field and padding_dummy[0] was set to 256 to ensure this, but 8 bits
|
|
||||||
# are now used, so zero will do.
|
|
||||||
|
|
||||||
padding_dummy = [0] * MAX_UNICODE
|
|
||||||
padding_dummy[0] = 0
|
|
||||||
|
|
||||||
# This block of code was added by PH in September 2012. It scans the other_case
|
# This block of code was added by PH in September 2012. It scans the other_case
|
||||||
# table to find sets of more than two characters that must all match each other
|
# table to find sets of more than two characters that must all match each other
|
||||||
|
@ -724,7 +714,7 @@ for s in caseless_sets:
|
||||||
# Combine all the tables
|
# Combine all the tables
|
||||||
|
|
||||||
table, records = combine_tables(script, category, break_props,
|
table, records = combine_tables(script, category, break_props,
|
||||||
caseless_offsets, other_case, scriptx_bidi_class, bool_props, padding_dummy)
|
caseless_offsets, other_case, scriptx_bidi_class, bool_props)
|
||||||
|
|
||||||
# Find the record size and create a string definition of the structure for
|
# Find the record size and create a string definition of the structure for
|
||||||
# outputting as a comment.
|
# outputting as a comment.
|
||||||
|
@ -816,7 +806,6 @@ const ucd_record PRIV(dummy_ucd_record)[] = {{
|
||||||
0, /* other case */
|
0, /* other case */
|
||||||
0 | (ucp_bidiL << UCD_BIDICLASS_SHIFT), /* script extension and bidi class */
|
0 | (ucp_bidiL << UCD_BIDICLASS_SHIFT), /* script extension and bidi class */
|
||||||
0, /* bool properties offset */
|
0, /* bool properties offset */
|
||||||
0 /* dummy filler */
|
|
||||||
}};
|
}};
|
||||||
#endif
|
#endif
|
||||||
\n""")
|
\n""")
|
||||||
|
|
|
@ -1251,7 +1251,7 @@ for (;;)
|
||||||
|
|
||||||
case PT_BOOL:
|
case PT_BOOL:
|
||||||
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||||
prop->bprops * ucd_boolprop_sets_item_size, code[2]) != 0;
|
UCD_BPROPS_PROP(prop), code[2]) != 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Should never occur, but keep compilers from grumbling. */
|
/* Should never occur, but keep compilers from grumbling. */
|
||||||
|
@ -1523,7 +1523,7 @@ for (;;)
|
||||||
|
|
||||||
case PT_BOOL:
|
case PT_BOOL:
|
||||||
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||||
prop->bprops * ucd_boolprop_sets_item_size, code[3]) != 0;
|
UCD_BPROPS_PROP(prop), code[3]) != 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Should never occur, but keep compilers from grumbling. */
|
/* Should never occur, but keep compilers from grumbling. */
|
||||||
|
@ -1778,7 +1778,7 @@ for (;;)
|
||||||
|
|
||||||
case PT_BOOL:
|
case PT_BOOL:
|
||||||
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||||
prop->bprops * ucd_boolprop_sets_item_size, code[3]) != 0;
|
UCD_BPROPS_PROP(prop), code[3]) != 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Should never occur, but keep compilers from grumbling. */
|
/* Should never occur, but keep compilers from grumbling. */
|
||||||
|
@ -2059,8 +2059,7 @@ for (;;)
|
||||||
|
|
||||||
case PT_BOOL:
|
case PT_BOOL:
|
||||||
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
OK = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||||
prop->bprops * ucd_boolprop_sets_item_size,
|
UCD_BPROPS_PROP(prop), code[1 + IMM2_SIZE + 2]) != 0;
|
||||||
code[1 + IMM2_SIZE + 2]) != 0;
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Should never occur, but keep compilers from grumbling. */
|
/* Should never occur, but keep compilers from grumbling. */
|
||||||
|
|
|
@ -1823,8 +1823,7 @@ typedef struct {
|
||||||
uint8_t caseset; /* offset to multichar other cases or zero */
|
uint8_t caseset; /* offset to multichar other cases or zero */
|
||||||
int32_t other_case; /* offset to other case, or zero if none */
|
int32_t other_case; /* offset to other case, or zero if none */
|
||||||
uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */
|
uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */
|
||||||
uint8_t bprops; /* binary properties offset */
|
uint16_t bprops; /* binary properties offset */
|
||||||
uint8_t dummy; /* spare - to round to multiple of 4 bytes */
|
|
||||||
} ucd_record;
|
} ucd_record;
|
||||||
|
|
||||||
/* UCD access macros */
|
/* UCD access macros */
|
||||||
|
@ -1843,9 +1842,11 @@ typedef struct {
|
||||||
|
|
||||||
#define UCD_SCRIPTX_MASK 0x3ff
|
#define UCD_SCRIPTX_MASK 0x3ff
|
||||||
#define UCD_BIDICLASS_SHIFT 11
|
#define UCD_BIDICLASS_SHIFT 11
|
||||||
|
#define UCD_BPROPS_MASK 0xfff
|
||||||
|
|
||||||
#define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK)
|
#define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK)
|
||||||
#define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT)
|
#define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT)
|
||||||
|
#define UCD_BPROPS_PROP(prop) ((prop)->bprops & UCD_BPROPS_MASK)
|
||||||
|
|
||||||
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
|
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
|
||||||
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
|
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
|
||||||
|
@ -1854,7 +1855,7 @@ typedef struct {
|
||||||
#define UCD_CASESET(ch) GET_UCD(ch)->caseset
|
#define UCD_CASESET(ch) GET_UCD(ch)->caseset
|
||||||
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
|
#define UCD_OTHERCASE(ch) ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
|
||||||
#define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch))
|
#define UCD_SCRIPTX(ch) UCD_SCRIPTX_PROP(GET_UCD(ch))
|
||||||
#define UCD_BPROPS(ch) GET_UCD(ch)->bprops
|
#define UCD_BPROPS(ch) UCD_BPROPS_PROP(GET_UCD(ch))
|
||||||
#define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch))
|
#define UCD_BIDICLASS(ch) UCD_BIDICLASS_PROP(GET_UCD(ch))
|
||||||
|
|
||||||
/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words
|
/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words
|
||||||
|
|
|
@ -7708,8 +7708,9 @@ if (unicode_status & XCLASS_NEEDS_UCD)
|
||||||
|
|
||||||
if (unicode_status & XCLASS_HAS_BOOL)
|
if (unicode_status & XCLASS_HAS_BOOL)
|
||||||
{
|
{
|
||||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
|
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
|
||||||
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
|
||||||
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
|
||||||
|
|
||||||
while (*cc != XCL_END)
|
while (*cc != XCL_END)
|
||||||
{
|
{
|
||||||
|
|
|
@ -2521,7 +2521,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
case PT_BOOL:
|
case PT_BOOL:
|
||||||
{
|
{
|
||||||
BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||||
prop->bprops * ucd_boolprop_sets_item_size, Fecode[2]) != 0;
|
UCD_BPROPS_PROP(prop), Fecode[2]) != 0;
|
||||||
if (ok == notmatch) RRETURN(MATCH_NOMATCH);
|
if (ok == notmatch) RRETURN(MATCH_NOMATCH);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -2875,7 +2875,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
GETCHARINCTEST(fc, Feptr);
|
GETCHARINCTEST(fc, Feptr);
|
||||||
prop = GET_UCD(fc);
|
prop = GET_UCD(fc);
|
||||||
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||||
prop->bprops * ucd_boolprop_sets_item_size, Lpropvalue) != 0;
|
UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
|
||||||
if (ok == notmatch)
|
if (ok == notmatch)
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
}
|
}
|
||||||
|
@ -3695,7 +3695,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
GETCHARINCTEST(fc, Feptr);
|
GETCHARINCTEST(fc, Feptr);
|
||||||
prop = GET_UCD(fc);
|
prop = GET_UCD(fc);
|
||||||
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||||
prop->bprops * ucd_boolprop_sets_item_size, Lpropvalue) != 0;
|
UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
|
||||||
if (ok == (Lctype == OP_NOTPROP))
|
if (ok == (Lctype == OP_NOTPROP))
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
}
|
}
|
||||||
|
@ -4263,7 +4263,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
GETCHARLENTEST(fc, Feptr, len);
|
GETCHARLENTEST(fc, Feptr, len);
|
||||||
prop = GET_UCD(fc);
|
prop = GET_UCD(fc);
|
||||||
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||||
prop->bprops * ucd_boolprop_sets_item_size, Lpropvalue) != 0;
|
UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
|
||||||
if (ok == notmatch) break;
|
if (ok == notmatch) break;
|
||||||
Feptr+= len;
|
Feptr+= len;
|
||||||
}
|
}
|
||||||
|
|
2822
src/pcre2_ucd.c
2822
src/pcre2_ucd.c
File diff suppressed because it is too large
Load Diff
|
@ -221,7 +221,7 @@ while ((t = *data++) != XCL_END)
|
||||||
|
|
||||||
case PT_BOOL:
|
case PT_BOOL:
|
||||||
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||||
prop->bprops * ucd_boolprop_sets_item_size, data[1]) != 0;
|
UCD_BPROPS_PROP(prop), data[1]) != 0;
|
||||||
if (ok == isprop) return !negated;
|
if (ok == isprop) return !negated;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue