A bug which was introduced by the single character repetition optimization was fixed.

This commit is contained in:
Zoltán Herczeg 2015-08-13 11:35:38 +00:00
parent b2144d14b6
commit d98800a946
4 changed files with 41 additions and 18 deletions

View File

@ -139,6 +139,9 @@ This issue was found by Karl Skomski with a custom LLVM fuzzer.
37. The JIT compiler should restore the control chain for empty possessive
repeats. This issue was found by Karl Skomski with a custom LLVM fuzzer.
38. A bug which was introduced by the single character repetition optimization
was fixed.
Version 10.20 30-June-2015
--------------------------

View File

@ -400,6 +400,8 @@ typedef struct compiler_common {
BOOL has_skip_arg;
/* (*THEN) is found in the pattern. */
BOOL has_then;
/* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
BOOL has_skip_in_assert_back;
/* Currently in recurse or negative assert. */
BOOL local_exit;
/* Currently in a positive assert. */
@ -818,6 +820,7 @@ static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPT
{
int count;
PCRE2_SPTR slot;
PCRE2_SPTR assert_back_end = cc - 1;
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
while (cc < ccend)
@ -889,6 +892,13 @@ while (cc < ccend)
cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
break;
case OP_ASSERTBACK:
slot = bracketend(cc);
if (slot > assert_back_end)
assert_back_end = slot;
cc += 1 + LINK_SIZE;
break;
case OP_THEN_ARG:
common->has_then = TRUE;
common->control_head_ptr = 1;
@ -910,9 +920,17 @@ while (cc < ccend)
cc += 1;
break;
case OP_SKIP:
if (cc < assert_back_end)
common->has_skip_in_assert_back = TRUE;
cc += 1;
break;
case OP_SKIP_ARG:
common->control_head_ptr = 1;
common->has_skip_arg = TRUE;
if (cc < assert_back_end)
common->has_skip_in_assert_back = TRUE;
cc += 1 + 2 + cc[1];
break;
@ -1042,7 +1060,7 @@ if (is_accelerated_repeat(cc))
return FALSE;
}
static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start)
static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_si depth)
{
PCRE2_SPTR next_alt;
@ -1083,8 +1101,8 @@ static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc
break;
}
if (*cc == OP_BRA || *cc == OP_CBRA)
detect_fast_fail(common, cc, private_data_start);
if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
detect_fast_fail(common, cc, private_data_start, depth - 1);
if (is_accelerated_repeat(cc))
{
@ -2405,8 +2423,9 @@ sljit_si i;
SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
}
static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
@ -8352,7 +8371,7 @@ else
}
if (fast_fail && fast_str_ptr != 0)
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
/* Handle fixed part first. */
if (exact > 1)
@ -10518,8 +10537,8 @@ private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
set_private_data_ptrs(common, &private_data_size, ccend);
if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
{
if (!detect_fast_forward_skip(common, &private_data_size))
detect_fast_fail(common, common->start, &private_data_size);
if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
detect_fast_fail(common, common->start, &private_data_size, 4);
}
SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);

View File

@ -820,6 +820,7 @@ static struct regression_test_case regression_test_cases[] = {
{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
{ MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
{ MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
{ MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
/* (*THEN) verb. */
{ MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },

22
testdata/testoutput16 vendored
View File

@ -185,11 +185,11 @@ Last code unit = 'z'
Subject length lower bound = 2
JIT compilation was successful
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
Minimum match limit = 3
Minimum match limit = 2
0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz (JIT)
1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaz\=find_limits
Minimum match limit = 16384
Minimum match limit = 16383
No match (JIT)
!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
@ -198,25 +198,25 @@ May match empty string
Subject length lower bound = 0
JIT compilation was successful
/* this is a C style comment */\=find_limits
Minimum match limit = 2
Minimum match limit = 1
0: /* this is a C style comment */ (JIT)
1: /* this is a C style comment */
/^(?>a)++/
aa\=find_limits
Minimum match limit = 2
Minimum match limit = 1
0: aa (JIT)
aaaaaaaaa\=find_limits
Minimum match limit = 2
Minimum match limit = 1
0: aaaaaaaaa (JIT)
/(a)(?1)++/
aa\=find_limits
Minimum match limit = 2
Minimum match limit = 1
0: aa (JIT)
1: a
aaaaaaaaa\=find_limits
Minimum match limit = 2
Minimum match limit = 1
0: aaaaaaaaa (JIT)
1: a
@ -237,12 +237,12 @@ Minimum match limit = 1
/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
aabbccddee\=find_limits
Minimum match limit = 6
Minimum match limit = 5
0: aabbccddee (JIT)
/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
aabbccddee\=find_limits
Minimum match limit = 6
Minimum match limit = 5
0: aabbccddee (JIT)
1: aa
2: bb
@ -252,7 +252,7 @@ Minimum match limit = 6
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
aabbccddee\=find_limits
Minimum match limit = 6
Minimum match limit = 5
0: aabbccddee (JIT)
1: aa
2: cc
@ -260,7 +260,7 @@ Minimum match limit = 6
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/jitfast
aabbccddee\=find_limits
Minimum match limit = 6
Minimum match limit = 5
0: aabbccddee (JIT)
1: aa
2: cc