A bug which was introduced by the single character repetition optimization was fixed.

This commit is contained in:
Zoltán Herczeg 2015-08-13 11:35:38 +00:00
parent b2144d14b6
commit d98800a946
4 changed files with 41 additions and 18 deletions

View File

@ -139,6 +139,9 @@ This issue was found by Karl Skomski with a custom LLVM fuzzer.
37. The JIT compiler should restore the control chain for empty possessive 37. The JIT compiler should restore the control chain for empty possessive
repeats. This issue was found by Karl Skomski with a custom LLVM fuzzer. repeats. This issue was found by Karl Skomski with a custom LLVM fuzzer.
38. A bug which was introduced by the single character repetition optimization
was fixed.
Version 10.20 30-June-2015 Version 10.20 30-June-2015
-------------------------- --------------------------

View File

@ -400,6 +400,8 @@ typedef struct compiler_common {
BOOL has_skip_arg; BOOL has_skip_arg;
/* (*THEN) is found in the pattern. */ /* (*THEN) is found in the pattern. */
BOOL has_then; BOOL has_then;
/* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
BOOL has_skip_in_assert_back;
/* Currently in recurse or negative assert. */ /* Currently in recurse or negative assert. */
BOOL local_exit; BOOL local_exit;
/* Currently in a positive assert. */ /* Currently in a positive assert. */
@ -818,6 +820,7 @@ static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPT
{ {
int count; int count;
PCRE2_SPTR slot; PCRE2_SPTR slot;
PCRE2_SPTR assert_back_end = cc - 1;
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
while (cc < ccend) while (cc < ccend)
@ -889,6 +892,13 @@ while (cc < ccend)
cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE); cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
break; break;
case OP_ASSERTBACK:
slot = bracketend(cc);
if (slot > assert_back_end)
assert_back_end = slot;
cc += 1 + LINK_SIZE;
break;
case OP_THEN_ARG: case OP_THEN_ARG:
common->has_then = TRUE; common->has_then = TRUE;
common->control_head_ptr = 1; common->control_head_ptr = 1;
@ -910,9 +920,17 @@ while (cc < ccend)
cc += 1; cc += 1;
break; break;
case OP_SKIP:
if (cc < assert_back_end)
common->has_skip_in_assert_back = TRUE;
cc += 1;
break;
case OP_SKIP_ARG: case OP_SKIP_ARG:
common->control_head_ptr = 1; common->control_head_ptr = 1;
common->has_skip_arg = TRUE; common->has_skip_arg = TRUE;
if (cc < assert_back_end)
common->has_skip_in_assert_back = TRUE;
cc += 1 + 2 + cc[1]; cc += 1 + 2 + cc[1];
break; break;
@ -1042,7 +1060,7 @@ if (is_accelerated_repeat(cc))
return FALSE; return FALSE;
} }
static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start) static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_si depth)
{ {
PCRE2_SPTR next_alt; PCRE2_SPTR next_alt;
@ -1083,8 +1101,8 @@ static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc
break; break;
} }
if (*cc == OP_BRA || *cc == OP_CBRA) if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
detect_fast_fail(common, cc, private_data_start); detect_fast_fail(common, cc, private_data_start, depth - 1);
if (is_accelerated_repeat(cc)) if (is_accelerated_repeat(cc))
{ {
@ -2405,8 +2423,9 @@ sljit_si i;
SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr); SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw)) for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
} }
static SLJIT_INLINE void do_reset_match(compiler_common *common, int length) static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
@ -8352,7 +8371,7 @@ else
} }
if (fast_fail && fast_str_ptr != 0) if (fast_fail && fast_str_ptr != 0)
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr)); add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr));
/* Handle fixed part first. */ /* Handle fixed part first. */
if (exact > 1) if (exact > 1)
@ -10518,8 +10537,8 @@ private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
set_private_data_ptrs(common, &private_data_size, ccend); set_private_data_ptrs(common, &private_data_size, ccend);
if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
{ {
if (!detect_fast_forward_skip(common, &private_data_size)) if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back)
detect_fast_fail(common, common->start, &private_data_size); detect_fast_fail(common, common->start, &private_data_size, 4);
} }
SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr); SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr);

View File

@ -820,6 +820,7 @@ static struct regression_test_case regression_test_cases[] = {
{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" }, { MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
{ MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," }, { MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
{ MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," }, { MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
{ MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
/* (*THEN) verb. */ /* (*THEN) verb. */
{ MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" }, { MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },

22
testdata/testoutput16 vendored
View File

@ -185,11 +185,11 @@ Last code unit = 'z'
Subject length lower bound = 2 Subject length lower bound = 2
JIT compilation was successful JIT compilation was successful
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
Minimum match limit = 3 Minimum match limit = 2
0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz (JIT) 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz (JIT)
1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaz\=find_limits aaaaaaaaaaaaaz\=find_limits
Minimum match limit = 16384 Minimum match limit = 16383
No match (JIT) No match (JIT)
!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I !((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
@ -198,25 +198,25 @@ May match empty string
Subject length lower bound = 0 Subject length lower bound = 0
JIT compilation was successful JIT compilation was successful
/* this is a C style comment */\=find_limits /* this is a C style comment */\=find_limits
Minimum match limit = 2 Minimum match limit = 1
0: /* this is a C style comment */ (JIT) 0: /* this is a C style comment */ (JIT)
1: /* this is a C style comment */ 1: /* this is a C style comment */
/^(?>a)++/ /^(?>a)++/
aa\=find_limits aa\=find_limits
Minimum match limit = 2 Minimum match limit = 1
0: aa (JIT) 0: aa (JIT)
aaaaaaaaa\=find_limits aaaaaaaaa\=find_limits
Minimum match limit = 2 Minimum match limit = 1
0: aaaaaaaaa (JIT) 0: aaaaaaaaa (JIT)
/(a)(?1)++/ /(a)(?1)++/
aa\=find_limits aa\=find_limits
Minimum match limit = 2 Minimum match limit = 1
0: aa (JIT) 0: aa (JIT)
1: a 1: a
aaaaaaaaa\=find_limits aaaaaaaaa\=find_limits
Minimum match limit = 2 Minimum match limit = 1
0: aaaaaaaaa (JIT) 0: aaaaaaaaa (JIT)
1: a 1: a
@ -237,12 +237,12 @@ Minimum match limit = 1
/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ /^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
aabbccddee\=find_limits aabbccddee\=find_limits
Minimum match limit = 6 Minimum match limit = 5
0: aabbccddee (JIT) 0: aabbccddee (JIT)
/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ /^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
aabbccddee\=find_limits aabbccddee\=find_limits
Minimum match limit = 6 Minimum match limit = 5
0: aabbccddee (JIT) 0: aabbccddee (JIT)
1: aa 1: aa
2: bb 2: bb
@ -252,7 +252,7 @@ Minimum match limit = 6
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ /^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
aabbccddee\=find_limits aabbccddee\=find_limits
Minimum match limit = 6 Minimum match limit = 5
0: aabbccddee (JIT) 0: aabbccddee (JIT)
1: aa 1: aa
2: cc 2: cc
@ -260,7 +260,7 @@ Minimum match limit = 6
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/jitfast /^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/jitfast
aabbccddee\=find_limits aabbccddee\=find_limits
Minimum match limit = 6 Minimum match limit = 5
0: aabbccddee (JIT) 0: aabbccddee (JIT)
1: aa 1: aa
2: cc 2: cc