From d98800a946758d4dd40c5e4f24fb56d105974583 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20Herczeg?= Date: Thu, 13 Aug 2015 11:35:38 +0000 Subject: [PATCH] A bug which was introduced by the single character repetition optimization was fixed. --- ChangeLog | 3 +++ src/pcre2_jit_compile.c | 33 ++++++++++++++++++++++++++------- src/pcre2_jit_test.c | 1 + testdata/testoutput16 | 22 +++++++++++----------- 4 files changed, 41 insertions(+), 18 deletions(-) diff --git a/ChangeLog b/ChangeLog index 03c4e89..cba8352 100644 --- a/ChangeLog +++ b/ChangeLog @@ -139,6 +139,9 @@ This issue was found by Karl Skomski with a custom LLVM fuzzer. 37. The JIT compiler should restore the control chain for empty possessive repeats. This issue was found by Karl Skomski with a custom LLVM fuzzer. +38. A bug which was introduced by the single character repetition optimization +was fixed. + Version 10.20 30-June-2015 -------------------------- diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index 3cbe49b..aa71270 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -400,6 +400,8 @@ typedef struct compiler_common { BOOL has_skip_arg; /* (*THEN) is found in the pattern. */ BOOL has_then; + /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */ + BOOL has_skip_in_assert_back; /* Currently in recurse or negative assert. */ BOOL local_exit; /* Currently in a positive assert. */ @@ -818,6 +820,7 @@ static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPT { int count; PCRE2_SPTR slot; +PCRE2_SPTR assert_back_end = cc - 1; /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ while (cc < ccend) @@ -889,6 +892,13 @@ while (cc < ccend) cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE); break; + case OP_ASSERTBACK: + slot = bracketend(cc); + if (slot > assert_back_end) + assert_back_end = slot; + cc += 1 + LINK_SIZE; + break; + case OP_THEN_ARG: common->has_then = TRUE; common->control_head_ptr = 1; @@ -910,9 +920,17 @@ while (cc < ccend) cc += 1; break; + case OP_SKIP: + if (cc < assert_back_end) + common->has_skip_in_assert_back = TRUE; + cc += 1; + break; + case OP_SKIP_ARG: common->control_head_ptr = 1; common->has_skip_arg = TRUE; + if (cc < assert_back_end) + common->has_skip_in_assert_back = TRUE; cc += 1 + 2 + cc[1]; break; @@ -1042,7 +1060,7 @@ if (is_accelerated_repeat(cc)) return FALSE; } -static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start) +static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_si depth) { PCRE2_SPTR next_alt; @@ -1083,8 +1101,8 @@ static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc break; } - if (*cc == OP_BRA || *cc == OP_CBRA) - detect_fast_fail(common, cc, private_data_start); + if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA)) + detect_fast_fail(common, cc, private_data_start, depth - 1); if (is_accelerated_repeat(cc)) { @@ -2405,8 +2423,9 @@ sljit_si i; SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr); +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw)) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0); } static SLJIT_INLINE void do_reset_match(compiler_common *common, int length) @@ -8352,7 +8371,7 @@ else } if (fast_fail && fast_str_ptr != 0) - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr)); + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr)); /* Handle fixed part first. */ if (exact > 1) @@ -10518,8 +10537,8 @@ private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); set_private_data_ptrs(common, &private_data_size, ccend); if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) { - if (!detect_fast_forward_skip(common, &private_data_size)) - detect_fast_fail(common, common->start, &private_data_size); + if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back) + detect_fast_fail(common, common->start, &private_data_size, 4); } SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr); diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c index 733ec14..f4bc954 100644 --- a/src/pcre2_jit_test.c +++ b/src/pcre2_jit_test.c @@ -820,6 +820,7 @@ static struct regression_test_case regression_test_cases[] = { { MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" }, { MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," }, { MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," }, + { MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" }, /* (*THEN) verb. */ { MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" }, diff --git a/testdata/testoutput16 b/testdata/testoutput16 index a45bd65..e262813 100644 --- a/testdata/testoutput16 +++ b/testdata/testoutput16 @@ -185,11 +185,11 @@ Last code unit = 'z' Subject length lower bound = 2 JIT compilation was successful aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits -Minimum match limit = 3 +Minimum match limit = 2 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz (JIT) 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaz\=find_limits -Minimum match limit = 16384 +Minimum match limit = 16383 No match (JIT) !((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I @@ -198,25 +198,25 @@ May match empty string Subject length lower bound = 0 JIT compilation was successful /* this is a C style comment */\=find_limits -Minimum match limit = 2 +Minimum match limit = 1 0: /* this is a C style comment */ (JIT) 1: /* this is a C style comment */ /^(?>a)++/ aa\=find_limits -Minimum match limit = 2 +Minimum match limit = 1 0: aa (JIT) aaaaaaaaa\=find_limits -Minimum match limit = 2 +Minimum match limit = 1 0: aaaaaaaaa (JIT) /(a)(?1)++/ aa\=find_limits -Minimum match limit = 2 +Minimum match limit = 1 0: aa (JIT) 1: a aaaaaaaaa\=find_limits -Minimum match limit = 2 +Minimum match limit = 1 0: aaaaaaaaa (JIT) 1: a @@ -237,12 +237,12 @@ Minimum match limit = 1 /^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ aabbccddee\=find_limits -Minimum match limit = 6 +Minimum match limit = 5 0: aabbccddee (JIT) /^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ aabbccddee\=find_limits -Minimum match limit = 6 +Minimum match limit = 5 0: aabbccddee (JIT) 1: aa 2: bb @@ -252,7 +252,7 @@ Minimum match limit = 6 /^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ aabbccddee\=find_limits -Minimum match limit = 6 +Minimum match limit = 5 0: aabbccddee (JIT) 1: aa 2: cc @@ -260,7 +260,7 @@ Minimum match limit = 6 /^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/jitfast aabbccddee\=find_limits -Minimum match limit = 6 +Minimum match limit = 5 0: aabbccddee (JIT) 1: aa 2: cc