Support (*ACCEPT) inside recurse in JIT.

This commit is contained in:
Zoltán Herczeg 2017-03-30 13:25:20 +00:00
parent 424bba4d15
commit a5da70b28c
2 changed files with 149 additions and 80 deletions

View File

@ -1862,11 +1862,14 @@ for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
#undef RECURSE_TMP_REG_COUNT
static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL *needs_control_head)
static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
{
int length = 1;
int size;
PCRE2_SPTR alternative;
BOOL quit_found = FALSE;
BOOL accept_found = FALSE;
BOOL setsom_found = FALSE;
BOOL setmark_found = FALSE;
BOOL capture_last_found = FALSE;
@ -1875,7 +1878,6 @@ BOOL control_head_found = FALSE;
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
SLJIT_ASSERT(common->control_head_ptr != 0);
control_head_found = TRUE;
length++;
#endif
/* Calculate the sum of the private machine words. */
@ -1886,30 +1888,17 @@ while (cc < ccend)
{
case OP_SET_SOM:
SLJIT_ASSERT(common->has_set_som);
if (!setsom_found)
{
length++;
setsom_found = TRUE;
}
setsom_found = TRUE;
cc += 1;
break;
case OP_RECURSE:
if (common->has_set_som && !setsom_found)
{
length++;
if (common->has_set_som)
setsom_found = TRUE;
}
if (common->mark_ptr != 0 && !setmark_found)
{
length++;
if (common->mark_ptr != 0)
setmark_found = TRUE;
}
if (common->capture_last_ptr != 0 && !capture_last_found)
{
length++;
if (common->capture_last_ptr != 0)
capture_last_found = TRUE;
}
cc += 1 + LINK_SIZE;
break;
@ -1940,11 +1929,8 @@ while (cc < ccend)
case OP_CBRA:
case OP_SCBRA:
length += 2;
if (common->capture_last_ptr != 0 && !capture_last_found)
{
length++;
if (common->capture_last_ptr != 0)
capture_last_found = TRUE;
}
if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
length++;
cc += 1 + LINK_SIZE + IMM2_SIZE;
@ -1953,11 +1939,8 @@ while (cc < ccend)
case OP_CBRAPOS:
case OP_SCBRAPOS:
length += 2 + 2;
if (common->capture_last_ptr != 0 && !capture_last_found)
{
length++;
if (common->capture_last_ptr != 0)
capture_last_found = TRUE;
}
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
@ -2032,25 +2015,38 @@ while (cc < ccend)
case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
if (!setmark_found)
{
length++;
setmark_found = TRUE;
}
if (common->control_head_ptr != 0 && !control_head_found)
{
length++;
if (common->control_head_ptr != 0)
control_head_found = TRUE;
}
if (*cc != OP_MARK)
quit_found = TRUE;
cc += 1 + 2 + cc[1];
break;
case OP_PRUNE:
case OP_SKIP:
case OP_COMMIT:
quit_found = TRUE;
cc++;
break;
case OP_SKIP_ARG:
quit_found = TRUE;
cc += 1 + 2 + cc[1];
break;
case OP_THEN:
SLJIT_ASSERT(common->control_head_ptr != 0);
quit_found = TRUE;
if (!control_head_found)
{
length++;
control_head_found = TRUE;
}
cc++;
break;
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
accept_found = TRUE;
cc++;
break;
@ -2062,7 +2058,21 @@ while (cc < ccend)
}
SLJIT_ASSERT(cc == ccend);
if (control_head_found)
length++;
if (capture_last_found)
length++;
if (quit_found)
{
if (setsom_found)
length++;
if (setmark_found)
length++;
}
*needs_control_head = control_head_found;
*has_quit = quit_found;
*has_accept = accept_found;
return length;
}
@ -2070,11 +2080,12 @@ enum copy_recurse_data_types {
recurse_copy_from_global,
recurse_copy_private_to_global,
recurse_copy_shared_to_global,
recurse_copy_kept_shared_to_global,
recurse_swap_global
};
static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
int type, int stackptr, int stacktop)
int type, int stackptr, int stacktop, BOOL has_quit)
{
delayed_mem_copy_status status;
PCRE2_SPTR alternative;
@ -2102,6 +2113,7 @@ switch (type)
case recurse_copy_private_to_global:
case recurse_copy_shared_to_global:
case recurse_copy_kept_shared_to_global:
from_sp = FALSE;
base_reg = STACK_TOP;
break;
@ -2109,20 +2121,20 @@ switch (type)
default:
SLJIT_ASSERT(type == recurse_swap_global);
from_sp = FALSE;
base_reg = TMP1;
base_reg = TMP2;
break;
}
stackptr = STACK(stackptr);
stacktop = STACK(stacktop);
status.tmp_regs[0] = TMP2;
status.saved_tmp_regs[0] = TMP2;
status.tmp_regs[0] = TMP1;
status.saved_tmp_regs[0] = TMP1;
if (base_reg != TMP1)
if (base_reg != TMP2)
{
status.tmp_regs[1] = TMP1;
status.saved_tmp_regs[1] = TMP1;
status.tmp_regs[1] = TMP2;
status.saved_tmp_regs[1] = TMP2;
}
else
{
@ -2141,8 +2153,10 @@ else
delayed_mem_copy_init(&status, common);
if (type != recurse_copy_shared_to_global)
if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
{
SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
if (!from_sp)
delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
@ -2175,7 +2189,7 @@ while (cc < ccend)
{
case OP_SET_SOM:
SLJIT_ASSERT(common->has_set_som);
if (!setsom_found)
if (has_quit && !setsom_found)
{
kept_shared_srcw[0] = OVECTOR(0);
kept_shared_count = 1;
@ -2185,17 +2199,20 @@ while (cc < ccend)
break;
case OP_RECURSE:
if (common->has_set_som && !setsom_found)
if (has_quit)
{
kept_shared_srcw[0] = OVECTOR(0);
kept_shared_count = 1;
setsom_found = TRUE;
}
if (common->mark_ptr != 0 && !setmark_found)
{
kept_shared_srcw[shared_count] = common->mark_ptr;
kept_shared_count++;
setmark_found = TRUE;
if (common->has_set_som && !setsom_found)
{
kept_shared_srcw[0] = OVECTOR(0);
kept_shared_count = 1;
setsom_found = TRUE;
}
if (common->mark_ptr != 0 && !setmark_found)
{
kept_shared_srcw[kept_shared_count] = common->mark_ptr;
kept_shared_count++;
setmark_found = TRUE;
}
}
if (common->capture_last_ptr != 0 && !capture_last_found)
{
@ -2384,7 +2401,7 @@ while (cc < ccend)
case OP_PRUNE_ARG:
case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
if (!setmark_found)
if (has_quit && !setmark_found)
{
kept_shared_srcw[0] = common->mark_ptr;
kept_shared_count = 1;
@ -2416,8 +2433,10 @@ while (cc < ccend)
break;
}
if (type != recurse_copy_shared_to_global)
if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
{
SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
for (i = 0; i < private_count; i++)
{
SLJIT_ASSERT(private_srcw[i] != 0);
@ -2434,8 +2453,10 @@ while (cc < ccend)
else
stackptr += sizeof(sljit_sw) * private_count;
if (type != recurse_copy_private_to_global)
if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
{
SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
for (i = 0; i < shared_count; i++)
{
SLJIT_ASSERT(shared_srcw[i] != 0);
@ -2452,8 +2473,10 @@ while (cc < ccend)
else
stackptr += sizeof(sljit_sw) * shared_count;
if (type == recurse_copy_from_global || type == recurse_copy_shared_to_global)
if (type != recurse_copy_private_to_global && type != recurse_swap_global)
{
SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
for (i = 0; i < kept_shared_count; i++)
{
SLJIT_ASSERT(kept_shared_srcw[i] != 0);
@ -11001,13 +11024,16 @@ PCRE2_SPTR cc = common->start + common->currententry->start;
PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
BOOL needs_control_head;
int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head);
BOOL has_quit;
BOOL has_accept;
int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
int alt_count, alt_max, local_size;
backtrack_common altbacktrack;
jump_list *match = NULL;
sljit_uw *next_update_addr = NULL;
struct sljit_jump *alt1 = NULL;
struct sljit_jump *alt2 = NULL;
struct sljit_jump *accept_exit = NULL;
struct sljit_label *quit;
/* Recurse captures then. */
@ -11028,13 +11054,14 @@ count_match(common);
local_size = (alt_max > 1) ? 2 : 1;
/* Stack layout: [private data][return address][alternative index] */
/* (Reversed) stack layout:
[private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
allocate_stack(common, private_data_size + local_size);
/* Save return address. */
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size);
copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
/* This variable is saved and restored all time when we enter or exit from a recursive context. */
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
@ -11064,14 +11091,10 @@ while (1)
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return;
/* TODO: this needs to be improved in the future. */
set_jumps(common->accept, LABEL());
common->accept_label = NULL;
allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
allocate_stack(common, (alt_max > 1) ? 2 : 1);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
if (alt_max > 1)
if (alt_max > 1 || has_accept)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
add_jump(compiler, &match, JUMP(SLJIT_JUMP));
@ -11083,13 +11106,16 @@ while (1)
common->currententry->backtrack_label = LABEL();
set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
sljit_emit_fast_enter(compiler, TMP2, 0);
sljit_emit_fast_enter(compiler, TMP1, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
if (has_accept)
accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_max * sizeof (sljit_sw));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
/* Save return address. */
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), STACK(local_size - 1), TMP2, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size);
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
if (alt_max > 1)
{
@ -11113,7 +11139,7 @@ while (1)
}
}
else
free_stack(common, 1);
free_stack(common, has_accept ? 2 : 1);
}
else if (alt_max > 4)
add_label_addr(common, next_update_addr++);
@ -11151,7 +11177,7 @@ while (1)
quit = LABEL();
copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size);
copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
free_stack(common, private_data_size + local_size);
@ -11160,19 +11186,50 @@ sljit_emit_fast_return(compiler, TMP2, 0);
if (common->quit != NULL)
{
SLJIT_ASSERT(has_quit);
set_jumps(common->quit, LABEL());
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size);
copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
JUMPTO(SLJIT_JUMP, quit);
}
if (has_accept)
{
JUMPHERE(accept_exit);
free_stack(common, 2);
/* Save return address. */
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
free_stack(common, private_data_size + local_size);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
sljit_emit_fast_return(compiler, TMP2, 0);
}
if (common->accept != NULL)
{
SLJIT_ASSERT(has_accept);
set_jumps(common->accept, LABEL());
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
allocate_stack(common, 2);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
}
set_jumps(match, LABEL());
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size);
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), STACK(local_size - 1));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
sljit_emit_fast_return(compiler, TMP2, 0);
}

View File

@ -724,6 +724,8 @@ static struct regression_test_case regression_test_cases[] = {
{ MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
{ MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
{ MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
{ MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
{ MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
/* 16 bit specific tests. */
{ CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
@ -842,6 +844,16 @@ static struct regression_test_case regression_test_cases[] = {
{ MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
{ MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
/* Recurse and control verbs. */
{ MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
{ MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
{ MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
{ MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
{ MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
{ MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
/* Deep recursion. */
{ MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
{ MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },