JIT support for Bidi_Control and Bidi_Class
This commit is contained in:
parent
49b29f837d
commit
4243515033
|
@ -7412,6 +7412,16 @@ return cc;
|
|||
|
||||
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#define XCLASS_SAVE_CHAR 0x01
|
||||
#define XCLASS_CHAR_SAVED 0x02
|
||||
#define XCLASS_HAS_TYPE 0x04
|
||||
#define XCLASS_HAS_SCRIPT 0x08
|
||||
#define XCLASS_HAS_BIDICO 0x10
|
||||
#define XCLASS_HAS_BIDICL 0x20
|
||||
#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_BIDICO | XCLASS_HAS_BIDICL)
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
|
||||
{
|
||||
DEFINE_COMPILER;
|
||||
|
@ -7426,8 +7436,7 @@ BOOL utf = common->utf;
|
|||
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
|
||||
BOOL charsaved = FALSE;
|
||||
sljit_u32 unicode_status = 0;
|
||||
int typereg = TMP1;
|
||||
const sljit_u32 *other_cases;
|
||||
sljit_uw typeoffset;
|
||||
|
@ -7454,7 +7463,7 @@ while (*cc != XCL_END)
|
|||
if (c > max) max = c;
|
||||
if (c < min) min = c;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
needschar = TRUE;
|
||||
unicode_status |= XCLASS_SAVE_CHAR;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
else if (*cc == XCL_RANGE)
|
||||
|
@ -7465,7 +7474,7 @@ while (*cc != XCL_END)
|
|||
GETCHARINCTEST(c, cc);
|
||||
if (c > max) max = c;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
needschar = TRUE;
|
||||
unicode_status |= XCLASS_SAVE_CHAR;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
@ -7506,11 +7515,11 @@ while (*cc != XCL_END)
|
|||
case PT_GC:
|
||||
case PT_PC:
|
||||
case PT_ALNUM:
|
||||
needstype = TRUE;
|
||||
unicode_status |= XCLASS_HAS_TYPE;
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
needsscript = TRUE;
|
||||
unicode_status |= XCLASS_HAS_SCRIPT;
|
||||
break;
|
||||
|
||||
case PT_SPACE:
|
||||
|
@ -7519,13 +7528,20 @@ while (*cc != XCL_END)
|
|||
case PT_PXGRAPH:
|
||||
case PT_PXPRINT:
|
||||
case PT_PXPUNCT:
|
||||
needstype = TRUE;
|
||||
needschar = TRUE;
|
||||
unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
|
||||
break;
|
||||
|
||||
case PT_CLIST:
|
||||
case PT_UCNC:
|
||||
needschar = TRUE;
|
||||
unicode_status |= XCLASS_SAVE_CHAR;
|
||||
break;
|
||||
|
||||
case PT_BIDICO:
|
||||
unicode_status |= XCLASS_HAS_BIDICO;
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
unicode_status |= XCLASS_HAS_BIDICL;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -7545,7 +7561,7 @@ if ((cc[-1] & XCL_NOT) != 0)
|
|||
else
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0);
|
||||
read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
|
||||
#else /* !SUPPORT_UNICODE */
|
||||
read_char(common, min, max, NULL, 0);
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
@ -7581,7 +7597,7 @@ else if ((cc[-1] & XCL_MAP) != 0)
|
|||
{
|
||||
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
charsaved = TRUE;
|
||||
unicode_status |= XCLASS_CHAR_SAVED;
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
|
||||
{
|
||||
|
@ -7609,9 +7625,9 @@ else if ((cc[-1] & XCL_MAP) != 0)
|
|||
}
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (needstype || needsscript)
|
||||
if (unicode_status & XCLASS_NEEDS_UCD)
|
||||
{
|
||||
if (needschar && !charsaved)
|
||||
if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR)
|
||||
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
|
@ -7631,18 +7647,16 @@ if (needstype || needsscript)
|
|||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
|
||||
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
|
||||
|
||||
/* Before anything else, we deal with scripts. */
|
||||
if (needsscript)
|
||||
{
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||
|
||||
ccbegin = cc;
|
||||
|
||||
if (unicode_status & XCLASS_HAS_SCRIPT)
|
||||
{
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
|
||||
|
||||
while (*cc != XCL_END)
|
||||
{
|
||||
if (*cc == XCL_SINGLE)
|
||||
|
@ -7674,53 +7688,97 @@ if (needstype || needsscript)
|
|||
}
|
||||
|
||||
cc = ccbegin;
|
||||
}
|
||||
|
||||
if (needstype)
|
||||
if (unicode_status & (XCLASS_HAS_BIDICO | XCLASS_HAS_BIDICL))
|
||||
{
|
||||
/* TMP2 has already been shifted by 2 */
|
||||
if (!needschar)
|
||||
{
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bidi));
|
||||
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
||||
if (unicode_status & XCLASS_HAS_BIDICO)
|
||||
{
|
||||
while (*cc != XCL_END)
|
||||
{
|
||||
if (*cc == XCL_SINGLE)
|
||||
{
|
||||
cc ++;
|
||||
GETCHARINCTEST(c, cc);
|
||||
}
|
||||
else if (*cc == XCL_RANGE)
|
||||
{
|
||||
cc ++;
|
||||
GETCHARINCTEST(c, cc);
|
||||
GETCHARINCTEST(c, cc);
|
||||
}
|
||||
else
|
||||
{
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||
|
||||
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
|
||||
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
||||
typereg = RETURN_ADDR;
|
||||
}
|
||||
}
|
||||
else if (needschar)
|
||||
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
|
||||
}
|
||||
else if (needstype)
|
||||
SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
|
||||
cc++;
|
||||
if (*cc == PT_BIDICO)
|
||||
{
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||
compares--;
|
||||
invertcmp = (compares == 0 && list != backtracks);
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
invertcmp ^= 0x1;
|
||||
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICONTROL_BIT);
|
||||
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
|
||||
add_jump(compiler, compares > 0 ? list : backtracks, jump);
|
||||
}
|
||||
cc += 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (!needschar)
|
||||
cc = ccbegin;
|
||||
}
|
||||
|
||||
if (unicode_status & XCLASS_HAS_BIDICL)
|
||||
{
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_MASK);
|
||||
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
||||
while (*cc != XCL_END)
|
||||
{
|
||||
if (*cc == XCL_SINGLE)
|
||||
{
|
||||
cc ++;
|
||||
GETCHARINCTEST(c, cc);
|
||||
}
|
||||
else if (*cc == XCL_RANGE)
|
||||
{
|
||||
cc ++;
|
||||
GETCHARINCTEST(c, cc);
|
||||
GETCHARINCTEST(c, cc);
|
||||
}
|
||||
else
|
||||
{
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||
SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
|
||||
cc++;
|
||||
if (*cc == PT_BIDICL)
|
||||
{
|
||||
compares--;
|
||||
invertcmp = (compares == 0 && list != backtracks);
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
invertcmp ^= 0x1;
|
||||
jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
|
||||
add_jump(compiler, compares > 0 ? list : backtracks, jump);
|
||||
}
|
||||
cc += 2;
|
||||
}
|
||||
}
|
||||
|
||||
cc = ccbegin;
|
||||
}
|
||||
}
|
||||
|
||||
if (unicode_status & XCLASS_SAVE_CHAR)
|
||||
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
|
||||
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
||||
|
||||
if (unicode_status & XCLASS_HAS_TYPE)
|
||||
{
|
||||
if (unicode_status & XCLASS_SAVE_CHAR)
|
||||
typereg = RETURN_ADDR;
|
||||
|
||||
OP1(SLJIT_MOV_U8, typereg, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
||||
}
|
||||
}
|
||||
else if (needschar)
|
||||
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Generating code. */
|
||||
|
@ -7821,6 +7879,8 @@ while (*cc != XCL_END)
|
|||
break;
|
||||
|
||||
case PT_SC:
|
||||
case PT_BIDICO:
|
||||
case PT_BIDICL:
|
||||
compares++;
|
||||
/* Do nothing. */
|
||||
break;
|
||||
|
|
|
@ -2498,8 +2498,6 @@
|
|||
# -----------------------------------------------------------------------------
|
||||
# Tests for bidi control and bidi class properties, not yet supported by JIT.
|
||||
|
||||
#subject no_jit
|
||||
|
||||
/\p{ bidi_control }/utf
|
||||
-->\x{202c}<--
|
||||
|
||||
|
@ -2605,8 +2603,6 @@
|
|||
/\p{bidi class:S}+\p{bidiclass:WS}+/utf
|
||||
-->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<--
|
||||
|
||||
#subject -no_jit
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# End of testinput4
|
||||
|
|
|
@ -4035,8 +4035,6 @@ No match
|
|||
# -----------------------------------------------------------------------------
|
||||
# Tests for bidi control and bidi class properties, not yet supported by JIT.
|
||||
|
||||
#subject no_jit
|
||||
|
||||
/\p{ bidi_control }/utf
|
||||
-->\x{202c}<--
|
||||
0: \x{202c}
|
||||
|
@ -4187,8 +4185,6 @@ No match
|
|||
-->\x{9}\x{b}\x{1f} \x{c} \x{2000} \x{3000}<--
|
||||
0: \x{09}\x{0b}\x{1f} \x{0c} \x{2000} \x{3000}
|
||||
|
||||
#subject -no_jit
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# End of testinput4
|
||||
|
|
Loading…
Reference in New Issue