Prefer single character optimization over bracket repetition in JIT.

This commit is contained in:
Zoltán Herczeg 2021-02-19 11:58:36 +00:00
parent 19a1319c0a
commit 0dd0283b17
1 changed files with 17 additions and 21 deletions

View File

@ -1238,6 +1238,7 @@ return: current number of iterators enhanced with fast fail
*/ */
static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth, int start) static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth, int start)
{ {
PCRE2_SPTR begin = cc;
PCRE2_SPTR next_alt; PCRE2_SPTR next_alt;
PCRE2_SPTR end; PCRE2_SPTR end;
PCRE2_SPTR accelerated_start; PCRE2_SPTR accelerated_start;
@ -1475,31 +1476,19 @@ do
case OP_CBRA: case OP_CBRA:
end = cc + GET(cc, 1); end = cc + GET(cc, 1);
if (*end == OP_KET && PRIVATE_DATA(end) == 0)
{
if (*cc == OP_CBRA)
{
if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
break;
cc += IMM2_SIZE;
}
cc += 1 + LINK_SIZE;
continue;
}
fast_forward_allowed = FALSE; fast_forward_allowed = FALSE;
if (depth >= 4) if (depth >= 4)
break; break;
end = bracketend(cc) - (1 + LINK_SIZE); end = bracketend(cc) - (1 + LINK_SIZE);
if (*end != OP_KET || PRIVATE_DATA(end) != 0) if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
break;
if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
break; break;
count = detect_early_fail(common, cc, private_data_start, depth + 1, count); count = detect_early_fail(common, cc, private_data_start, depth + 1, count);
if (PRIVATE_DATA(cc) != 0)
common->private_data_ptrs[begin - common->start] = 1;
if (count < EARLY_FAIL_ENHANCE_MAX) if (count < EARLY_FAIL_ENHANCE_MAX)
{ {
cc = end + (1 + LINK_SIZE); cc = end + (1 + LINK_SIZE);
@ -1555,6 +1544,8 @@ do
return EARLY_FAIL_ENHANCE_MAX; return EARLY_FAIL_ENHANCE_MAX;
} }
/* Cannot be part of a repeat. */
common->private_data_ptrs[begin - common->start] = 1;
count++; count++;
if (count < EARLY_FAIL_ENHANCE_MAX) if (count < EARLY_FAIL_ENHANCE_MAX)
@ -1620,11 +1611,12 @@ sljit_sw length = end - begin;
sljit_s32 min, max, i; sljit_s32 min, max, i;
/* Detect fixed iterations first. */ /* Detect fixed iterations first. */
if (end[-(1 + LINK_SIZE)] != OP_KET) if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
return FALSE; return FALSE;
/* Already detected repeat. */ /* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0) * Skip the check of the second part. */
if (PRIVATE_DATA(end - LINK_SIZE) == 0)
return TRUE; return TRUE;
next = end; next = end;
@ -1763,6 +1755,7 @@ while (cc < ccend)
if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE) if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
break; break;
/* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)) if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
{ {
if (detect_repeat(common, cc)) if (detect_repeat(common, cc))
@ -1813,6 +1806,7 @@ while (cc < ccend)
case OP_COND: case OP_COND:
/* Might be a hidden SCOND. */ /* Might be a hidden SCOND. */
common->private_data_ptrs[cc - common->start] = 0;
alternative = cc + GET(cc, 1); alternative = cc + GET(cc, 1);
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
{ {
@ -13661,10 +13655,12 @@ if (!common->private_data_ptrs)
memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32)); memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
set_private_data_ptrs(common, &private_data_size, ccend);
if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back) if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
detect_early_fail(common, common->start, &private_data_size, 0, 0); detect_early_fail(common, common->start, &private_data_size, 0, 0);
set_private_data_ptrs(common, &private_data_size, ccend);
SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr); SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
if (private_data_size > SLJIT_MAX_LOCAL_SIZE) if (private_data_size > SLJIT_MAX_LOCAL_SIZE)