Support more accelerated repeat cases in JIT.

This commit is contained in:
Zoltán Herczeg 2020-02-27 08:35:14 +00:00
parent 3155a6951f
commit 325908279e
1 changed files with 120 additions and 46 deletions

View File

@ -1270,6 +1270,7 @@ switch(*cc)
cc += (1 + (32 / sizeof(PCRE2_UCHAR))); cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
#endif #endif
/* Only these types are supported. */
switch(*cc) switch(*cc)
{ {
case OP_CRSTAR: case OP_CRSTAR:
@ -1315,8 +1316,10 @@ while (TRUE)
break; break;
end = cc + GET(cc, 1); end = cc + GET(cc, 1);
/* Iterated brackets are skipped. */
if (*end != OP_KET || PRIVATE_DATA(end) != 0) if (*end != OP_KET || PRIVATE_DATA(end) != 0)
return FALSE; return FALSE;
if (*cc == OP_CBRA) if (*cc == OP_CBRA)
{ {
if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
@ -1336,20 +1339,15 @@ if (is_accelerated_repeat(cc))
return FALSE; return FALSE;
} }
static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth) static void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth)
{ {
PCRE2_SPTR next_alt; PCRE2_SPTR next_alt;
PCRE2_SPTR end;
SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA); SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) do
return;
next_alt = bracketend(cc) - (1 + LINK_SIZE);
if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
return;
do
{ {
next_alt = cc + GET(cc, 1); next_alt = cc + GET(cc, 1);
@ -1364,22 +1362,99 @@ static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc
case OP_SET_SOM: case OP_SET_SOM:
case OP_NOT_WORD_BOUNDARY: case OP_NOT_WORD_BOUNDARY:
case OP_WORD_BOUNDARY: case OP_WORD_BOUNDARY:
case OP_NOT_DIGIT:
case OP_DIGIT:
case OP_NOT_WHITESPACE:
case OP_WHITESPACE:
case OP_NOT_WORDCHAR:
case OP_WORDCHAR:
case OP_ANY:
case OP_ALLANY:
case OP_ANYBYTE:
case OP_ANYNL:
case OP_NOT_HSPACE:
case OP_HSPACE:
case OP_NOT_VSPACE:
case OP_VSPACE:
case OP_EODN: case OP_EODN:
case OP_EOD: case OP_EOD:
case OP_CIRC: case OP_CIRC:
case OP_CIRCM: case OP_CIRCM:
case OP_DOLL: case OP_DOLL:
case OP_DOLLM: case OP_DOLLM:
/* Zero width assertions. */
cc++; cc++;
continue; continue;
case OP_NOTPROP:
case OP_PROP:
cc += 1 + 2;
continue;
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
cc += 2;
#ifdef SUPPORT_UNICODE
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
continue;
case OP_CLASS:
case OP_NCLASS:
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
case OP_XCLASS:
end = cc + ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
#else
end = cc + (1 + (32 / sizeof(PCRE2_UCHAR)));
#endif
if (*end >= OP_CRSTAR && *end <= OP_CRPOSRANGE)
break;
cc = end;
continue;
case OP_BRA:
case OP_CBRA:
end = cc + GET(cc, 1);
if (*end == OP_KET && PRIVATE_DATA(end) == 0)
{
if (*cc == OP_CBRA)
{
if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
break;
cc += IMM2_SIZE;
}
cc += 1 + LINK_SIZE;
continue;
}
if (depth == 0)
break;
end = bracketend(cc) - (1 + LINK_SIZE);
if (*end != OP_KET || PRIVATE_DATA(end) != 0)
break;
if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
break;
detect_fast_fail(common, cc, private_data_start, depth - 1);
break;
case OP_KET:
SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
if (cc >= next_alt)
break;
cc += 1 + LINK_SIZE;
continue;
} }
break; break;
} }
if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
detect_fast_fail(common, cc, private_data_start, depth - 1);
if (is_accelerated_repeat(cc)) if (is_accelerated_repeat(cc))
{ {
common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start; common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
@ -1393,10 +1468,9 @@ static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc
if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
return; return;
} }
cc = next_alt; cc = next_alt;
} }
while (*cc == OP_ALT); while (*cc == OP_ALT);
} }
static int get_class_iterator_size(PCRE2_SPTR cc) static int get_class_iterator_size(PCRE2_SPTR cc)