From 5092ce5b2fe16f4fbe870a4fecae529c120b1b8b Mon Sep 17 00:00:00 2001 From: Zoltan Herczeg Date: Tue, 11 Jan 2022 11:37:26 +0000 Subject: [PATCH] Support boolean properties in JIT --- src/pcre2_jit_compile.c | 166 ++++++++++++++++++++-------------------- 1 file changed, 82 insertions(+), 84 deletions(-) diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index 8eab193..304e5ce 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -7418,9 +7418,9 @@ static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHA #define XCLASS_HAS_TYPE 0x004 #define XCLASS_HAS_SCRIPT 0x008 #define XCLASS_HAS_SCRIPT_EXTENSION 0x010 -#define XCLASS_HAS_BIDICO 0x020 +#define XCLASS_HAS_BOOL 0x020 #define XCLASS_HAS_BIDICL 0x040 -#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BIDICO | XCLASS_HAS_BIDICL) +#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL) #define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080 #define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100 #define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200 @@ -7551,8 +7551,8 @@ while (*cc != XCL_END) unicode_status |= XCLASS_SAVE_CHAR; break; - case PT_BIDICO: - unicode_status |= XCLASS_HAS_BIDICO; + case PT_BOOL: + unicode_status |= XCLASS_HAS_BOOL; break; case PT_BIDICL: @@ -7668,6 +7668,82 @@ if (unicode_status & XCLASS_NEEDS_UCD) ccbegin = cc; + if (unicode_status & XCLASS_HAS_BIDICL) + { + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bidi)); + + while (*cc != XCL_END) + { + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); + } + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + if (*cc == PT_BIDICL) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; + jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]); + add_jump(compiler, compares > 0 ? list : backtracks, jump); + } + cc += 2; + } + } + + cc = ccbegin; + } + + if (unicode_status & XCLASS_HAS_BOOL) + { + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops)); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + + while (*cc != XCL_END) + { + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); + } + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + if (*cc == PT_BOOL) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; + + OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f)); + add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); + } + cc += 2; + } + } + + cc = ccbegin; + } + if (unicode_status & XCLASS_HAS_SCRIPT) { OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); @@ -7718,7 +7794,7 @@ if (unicode_status & XCLASS_NEEDS_UCD) if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP) { - if (unicode_status & (XCLASS_HAS_BIDICO | XCLASS_HAS_BIDICL | XCLASS_HAS_TYPE)) + if (unicode_status & XCLASS_HAS_TYPE) { if (unicode_status & XCLASS_SAVE_CHAR) { @@ -7785,84 +7861,6 @@ if (unicode_status & XCLASS_NEEDS_UCD) cc = ccbegin; } - if (unicode_status & (XCLASS_HAS_BIDICO | XCLASS_HAS_BIDICL)) - { - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bidi)); - - if (unicode_status & XCLASS_HAS_BIDICO) - { - while (*cc != XCL_END) - { - if (*cc == XCL_SINGLE) - { - cc ++; - GETCHARINCTEST(c, cc); - } - else if (*cc == XCL_RANGE) - { - cc ++; - GETCHARINCTEST(c, cc); - GETCHARINCTEST(c, cc); - } - else - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - cc++; - if (*cc == PT_BIDICO) - { - compares--; - invertcmp = (compares == 0 && list != backtracks); - if (cc[-1] == XCL_NOTPROP) - invertcmp ^= 0x1; - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICONTROL_BIT); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - add_jump(compiler, compares > 0 ? list : backtracks, jump); - } - cc += 2; - } - } - - cc = ccbegin; - } - - if (unicode_status & XCLASS_HAS_BIDICL) - { - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_MASK); - - while (*cc != XCL_END) - { - if (*cc == XCL_SINGLE) - { - cc ++; - GETCHARINCTEST(c, cc); - } - else if (*cc == XCL_RANGE) - { - cc ++; - GETCHARINCTEST(c, cc); - GETCHARINCTEST(c, cc); - } - else - { - SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); - cc++; - if (*cc == PT_BIDICL) - { - compares--; - invertcmp = (compares == 0 && list != backtracks); - if (cc[-1] == XCL_NOTPROP) - invertcmp ^= 0x1; - jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]); - add_jump(compiler, compares > 0 ? list : backtracks, jump); - } - cc += 2; - } - } - - cc = ccbegin; - } - } - if (unicode_status & XCLASS_SAVE_CHAR) OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); @@ -7975,7 +7973,7 @@ while (*cc != XCL_END) case PT_SC: case PT_SCX: - case PT_BIDICO: + case PT_BOOL: case PT_BIDICL: compares++; /* Do nothing. */