Improve single character iterators, add special path to dotall.

This commit is contained in:
Zoltán Herczeg 2019-06-07 13:48:59 +00:00
parent dea540877b
commit cc51779d88
1 changed files with 300 additions and 162 deletions

View File

@ -3371,6 +3371,35 @@ else
JUMPHERE(jump);
}
static void process_partial_match(compiler_common *common)
{
DEFINE_COMPILER;
struct sljit_jump *jump;
/* Partial matching mode. */
if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
{
jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
JUMPHERE(jump);
}
else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
{
if (common->partialmatchlabel != NULL)
CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
else
add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
}
}
static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
{
DEFINE_COMPILER;
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
process_partial_match(common);
}
static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
{
/* Reads the character into TMP1, keeps STR_PTR.
@ -11466,18 +11495,74 @@ switch(opcode)
JUMPTO(SLJIT_JUMP, label);
if (jump != NULL)
JUMPHERE(jump);
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;
}
#ifdef SUPPORT_UNICODE
else if (type == OP_ALLANY && !common->invalid_utf)
#else
else if (type == OP_ALLANY)
#endif
{
if (opcode == OP_STAR)
{
if (private_data_ptr == 0)
allocate_stack(common, 2);
OP1(SLJIT_MOV, base, offset0, STR_END, 0);
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
process_partial_match(common);
if (fast_str_ptr != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_END, 0);
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;
}
#ifdef SUPPORT_UNICODE
else if (!common->utf)
#else
else
#endif
{
if (private_data_ptr == 0)
allocate_stack(common, 2);
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
if (common->mode == PCRE2_JIT_COMPLETE)
{
OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
}
else
{
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
process_partial_match(common);
JUMPHERE(jump);
}
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
if (fast_str_ptr != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;
}
}
charpos_enabled = FALSE;
charpos_char = 0;
charpos_othercasebit = 0;
if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
{
charpos_enabled = TRUE;
#ifdef SUPPORT_UNICODE
charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
#else
charpos_enabled = TRUE;
#endif
if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
{
@ -11587,17 +11672,19 @@ switch(opcode)
if (opcode == OP_UPTO)
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
detect_partial_match(common, &no_match);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
if (opcode == OP_UPTO)
{
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
}
else
JUMPTO(SLJIT_JUMP, label);
detect_partial_match_to(common, label);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
set_jumps(no_match, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
@ -11614,17 +11701,17 @@ switch(opcode)
if (opcode == OP_UPTO)
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
label = LABEL();
detect_partial_match(common, &no_match);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
if (opcode == OP_UPTO)
{
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
}
else
JUMPTO(SLJIT_JUMP, label);
detect_partial_match_to(common, label);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
set_jumps(no_char1_match, LABEL());
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
@ -11633,7 +11720,7 @@ switch(opcode)
if (fast_str_ptr != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
}
}
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
break;
@ -11670,25 +11757,47 @@ switch(opcode)
break;
case OP_POSSTAR:
#if defined SUPPORT_UNICODE
if (type == OP_ALLANY && !common->invalid_utf)
#else
if (type == OP_ALLANY)
#endif
{
OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
process_partial_match(common);
if (fast_str_ptr != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_END, 0);
break;
}
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
{
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
detect_partial_match(common, &no_match);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
JUMPTO(SLJIT_JUMP, label);
detect_partial_match_to(common, label);
set_jumps(no_match, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
if (fast_str_ptr != 0)
{
if (tmp_base == TMP3)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, TMP3, 0);
else
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
}
break;
}
#endif
label = LABEL();
detect_partial_match(common, &no_match);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
JUMPTO(SLJIT_JUMP, label);
detect_partial_match_to(common, label);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
set_jumps(no_char1_match, LABEL());
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
set_jumps(no_match, LABEL());
@ -11703,23 +11812,52 @@ switch(opcode)
{
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
detect_partial_match(common, &no_match);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
detect_partial_match_to(common, label);
set_jumps(no_match, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
break;
}
#endif
if (type == OP_ALLANY)
{
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
if (common->mode == PCRE2_JIT_COMPLETE)
{
OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
}
else
{
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
process_partial_match(common);
JUMPHERE(jump);
}
if (fast_str_ptr != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0);
break;
}
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
label = LABEL();
detect_partial_match(common, &no_match);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
detect_partial_match_to(common, label);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
set_jumps(no_char1_match, LABEL());
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
set_jumps(no_match, LABEL());