Follow the partial matching changes in JIT.

This commit is contained in:
Zoltán Herczeg 2019-07-23 12:34:58 +00:00
parent 3572634086
commit 82a4729e13
5 changed files with 43 additions and 30 deletions

View File

@ -413,6 +413,8 @@ typedef struct compiler_common {
sljit_sw lcc; sljit_sw lcc;
/* Mode can be PCRE2_JIT_COMPLETE and others. */ /* Mode can be PCRE2_JIT_COMPLETE and others. */
int mode; int mode;
/* TRUE, when empty match is accepted for partial matching. */
BOOL allow_empty_partial;
/* TRUE, when minlength is greater than 0. */ /* TRUE, when minlength is greater than 0. */
BOOL might_be_empty; BOOL might_be_empty;
/* \K is found in the pattern. */ /* \K is found in the pattern. */
@ -3303,7 +3305,7 @@ SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
if (common->mode == PCRE2_JIT_COMPLETE) if (common->mode == PCRE2_JIT_COMPLETE)
return; return;
if (!force) if (!force && !common->allow_empty_partial)
jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT) else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
@ -3365,7 +3367,11 @@ if (common->mode == PCRE2_JIT_COMPLETE)
/* Partial matching mode. */ /* Partial matching mode. */
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
if (!common->allow_empty_partial)
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
if (common->mode == PCRE2_JIT_PARTIAL_SOFT) if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
{ {
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
@ -8332,12 +8338,14 @@ switch(type)
JUMPHERE(jump[3]); JUMPHERE(jump[3]);
} }
JUMPHERE(jump[0]); JUMPHERE(jump[0]);
check_partial(common, FALSE); if (common->mode != PCRE2_JIT_COMPLETE)
check_partial(common, TRUE);
return cc; return cc;
case OP_EOD: case OP_EOD:
add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
check_partial(common, FALSE); if (common->mode != PCRE2_JIT_COMPLETE)
check_partial(common, TRUE);
return cc; return cc;
case OP_DOLL: case OP_DOLL:
@ -12642,7 +12650,7 @@ struct sljit_jump *once = NULL;
struct sljit_jump *cond = NULL; struct sljit_jump *cond = NULL;
struct sljit_label *rmin_label = NULL; struct sljit_label *rmin_label = NULL;
struct sljit_label *exact_label = NULL; struct sljit_label *exact_label = NULL;
struct sljit_put_label *put_label; struct sljit_put_label *put_label = NULL;
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
{ {
@ -13696,6 +13704,7 @@ common->fcc = tables + fcc_offset;
common->lcc = (sljit_sw)(tables + lcc_offset); common->lcc = (sljit_sw)(tables + lcc_offset);
common->mode = mode; common->mode = mode;
common->might_be_empty = re->minlength == 0; common->might_be_empty = re->minlength == 0;
common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY) != 0;
common->nltype = NLTYPE_FIXED; common->nltype = NLTYPE_FIXED;
switch(re->newline_convention) switch(re->newline_convention)
{ {

View File

@ -214,6 +214,10 @@
#define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len) #define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len)
#endif #endif
#ifndef SLJIT_MEMMOVE
#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len)
#endif
#ifndef SLJIT_ZEROMEM #ifndef SLJIT_ZEROMEM
#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len) #define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len)
#endif #endif

View File

@ -103,7 +103,7 @@ static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, slji
} }
code_ptr -= put_label->flags + (2 + sizeof(sljit_uw)); code_ptr -= put_label->flags + (2 + sizeof(sljit_uw));
SLJIT_MEMCPY(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags); SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags);
SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);

24
testdata/testinput2 vendored
View File

@ -5694,35 +5694,35 @@ a)"xI
/(\2)((?=(?<=\1)))/ /(\2)((?=(?<=\1)))/
/c*+(?<=[bc])/ /c*+(?<=[bc])/
abc\=ph,no_jit abc\=ph
ab\=ph,no_jit ab\=ph
abc\=ps,no_jit abc\=ps
ab\=ps,no_jit ab\=ps
/c++(?<=[bc])/ /c++(?<=[bc])/
abc\=ph,no_jit abc\=ph
ab\=ph,no_jit ab\=ph
/(?<=(?=.(?<=x)))/ /(?<=(?=.(?<=x)))/
abx abx
ab\=ph,no_jit ab\=ph
bxyz bxyz
xyz xyz
/\z/ /\z/
abc\=ph,no_jit abc\=ph
abc\=ps abc\=ps
/\Z/ /\Z/
abc\=ph,no_jit abc\=ph
abc\=ps abc\=ps
abc\n\=ph,no_jit abc\n\=ph
abc\n\=ps abc\n\=ps
/(?![ab]).*/ /(?![ab]).*/
ab\=ph,no_jit ab\=ph
/c*+/ /c*+/
ab\=ph,offset=2,no_jit ab\=ph,offset=2
# End of testinput2 # End of testinput2

24
testdata/testoutput2 vendored
View File

@ -17190,25 +17190,25 @@ Subject length lower bound = 1
Failed: error 125 at offset 8: lookbehind assertion is not fixed length Failed: error 125 at offset 8: lookbehind assertion is not fixed length
/c*+(?<=[bc])/ /c*+(?<=[bc])/
abc\=ph,no_jit abc\=ph
Partial match: c Partial match: c
ab\=ph,no_jit ab\=ph
Partial match: Partial match:
abc\=ps,no_jit abc\=ps
0: c 0: c
ab\=ps,no_jit ab\=ps
0: 0:
/c++(?<=[bc])/ /c++(?<=[bc])/
abc\=ph,no_jit abc\=ph
Partial match: c Partial match: c
ab\=ph,no_jit ab\=ph
Partial match: Partial match:
/(?<=(?=.(?<=x)))/ /(?<=(?=.(?<=x)))/
abx abx
0: 0:
ab\=ph,no_jit ab\=ph
Partial match: Partial match:
bxyz bxyz
0: 0:
@ -17216,27 +17216,27 @@ Partial match:
0: 0:
/\z/ /\z/
abc\=ph,no_jit abc\=ph
Partial match: Partial match:
abc\=ps abc\=ps
0: 0:
/\Z/ /\Z/
abc\=ph,no_jit abc\=ph
Partial match: Partial match:
abc\=ps abc\=ps
0: 0:
abc\n\=ph,no_jit abc\n\=ph
Partial match: \x0a Partial match: \x0a
abc\n\=ps abc\n\=ps
0: 0:
/(?![ab]).*/ /(?![ab]).*/
ab\=ph,no_jit ab\=ph
Partial match: Partial match:
/c*+/ /c*+/
ab\=ph,offset=2,no_jit ab\=ph,offset=2
Partial match: Partial match:
# End of testinput2 # End of testinput2