Merge branch 'PhilipHazel:master' into master

This commit is contained in:
Joe Zhang 2022-03-28 16:58:41 +08:00 committed by GitHub
commit dd1e3921c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 241 additions and 148 deletions

View File

@ -95,6 +95,9 @@ fail.
23. Fixed a unicode properrty matching issue in JIT. The character was not
fully read in caseless matching.
24. Fixed an issue affecting recursions in JIT caused by duplicated data
transfers.
Version 10.39 29-October-2021
-----------------------------

View File

@ -413,6 +413,9 @@ typedef struct compiler_common {
/* Locals used by fast fail optimization. */
sljit_s32 early_fail_start_ptr;
sljit_s32 early_fail_end_ptr;
/* Variables used by recursive call generator. */
sljit_s32 recurse_bitset_size;
uint8_t *recurse_bitset;
/* Flipped and lower case tables. */
const sljit_u8 *fcc;
@ -2317,22 +2320,47 @@ for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
#undef RECURSE_TMP_REG_COUNT
static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
{
uint8_t *byte;
uint8_t mask;
SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
bit_index >>= SLJIT_WORD_SHIFT;
SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
mask = 1 << (bit_index & 0x7);
byte = common->recurse_bitset + (bit_index >> 3);
if (*byte & mask)
return FALSE;
*byte |= mask;
return TRUE;
}
enum get_recurse_flags {
recurse_flag_quit_found = (1 << 0),
recurse_flag_accept_found = (1 << 1),
recurse_flag_setsom_found = (1 << 2),
recurse_flag_setmark_found = (1 << 3),
recurse_flag_control_head_found = (1 << 4),
};
static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
{
int length = 1;
int size;
int size, offset;
PCRE2_SPTR alternative;
BOOL quit_found = FALSE;
BOOL accept_found = FALSE;
BOOL setsom_found = FALSE;
BOOL setmark_found = FALSE;
BOOL capture_last_found = FALSE;
BOOL control_head_found = FALSE;
uint32_t recurse_flags = 0;
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
SLJIT_ASSERT(common->control_head_ptr != 0);
control_head_found = TRUE;
recurse_flags |= recurse_flag_control_head_found;
#endif
/* Calculate the sum of the private machine words. */
@ -2343,24 +2371,26 @@ while (cc < ccend)
{
case OP_SET_SOM:
SLJIT_ASSERT(common->has_set_som);
setsom_found = TRUE;
recurse_flags |= recurse_flag_setsom_found;
cc += 1;
break;
case OP_RECURSE:
if (common->has_set_som)
setsom_found = TRUE;
recurse_flags |= recurse_flag_setsom_found;
if (common->mark_ptr != 0)
setmark_found = TRUE;
if (common->capture_last_ptr != 0)
capture_last_found = TRUE;
recurse_flags |= recurse_flag_setmark_found;
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
length++;
cc += 1 + LINK_SIZE;
break;
case OP_KET:
if (PRIVATE_DATA(cc) != 0)
offset = PRIVATE_DATA(cc);
if (offset != 0)
{
length++;
if (recurse_check_bit(common, offset))
length++;
SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
cc += PRIVATE_DATA(cc + 1);
}
@ -2379,39 +2409,55 @@ while (cc < ccend)
case OP_SBRA:
case OP_SBRAPOS:
case OP_SCOND:
length++;
SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
if (recurse_check_bit(common, PRIVATE_DATA(cc)))
length++;
cc += 1 + LINK_SIZE;
break;
case OP_CBRA:
case OP_SCBRA:
length += 2;
if (common->capture_last_ptr != 0)
capture_last_found = TRUE;
if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
offset = GET2(cc, 1 + LINK_SIZE);
if (recurse_check_bit(common, OVECTOR(offset << 1)))
{
SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
length += 2;
}
if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
length++;
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
length++;
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
case OP_CBRAPOS:
case OP_SCBRAPOS:
length += 2 + 2;
if (common->capture_last_ptr != 0)
capture_last_found = TRUE;
offset = GET2(cc, 1 + LINK_SIZE);
if (recurse_check_bit(common, OVECTOR(offset << 1)))
{
SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
length += 2;
}
if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
length++;
if (recurse_check_bit(common, PRIVATE_DATA(cc)))
length++;
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
length++;
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
case OP_COND:
/* Might be a hidden SCOND. */
alternative = cc + GET(cc, 1);
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
length++;
cc += 1 + LINK_SIZE;
break;
CASE_ITERATOR_PRIVATE_DATA_1
if (PRIVATE_DATA(cc) != 0)
offset = PRIVATE_DATA(cc);
if (offset != 0 && recurse_check_bit(common, offset))
length++;
cc += 2;
#ifdef SUPPORT_UNICODE
@ -2420,8 +2466,12 @@ while (cc < ccend)
break;
CASE_ITERATOR_PRIVATE_DATA_2A
if (PRIVATE_DATA(cc) != 0)
offset = PRIVATE_DATA(cc);
if (offset != 0 && recurse_check_bit(common, offset))
{
SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
length += 2;
}
cc += 2;
#ifdef SUPPORT_UNICODE
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
@ -2429,8 +2479,12 @@ while (cc < ccend)
break;
CASE_ITERATOR_PRIVATE_DATA_2B
if (PRIVATE_DATA(cc) != 0)
offset = PRIVATE_DATA(cc);
if (offset != 0 && recurse_check_bit(common, offset))
{
SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
length += 2;
}
cc += 2 + IMM2_SIZE;
#ifdef SUPPORT_UNICODE
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
@ -2438,20 +2492,29 @@ while (cc < ccend)
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
if (PRIVATE_DATA(cc) != 0)
offset = PRIVATE_DATA(cc);
if (offset != 0 && recurse_check_bit(common, offset))
length++;
cc += 1;
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
if (PRIVATE_DATA(cc) != 0)
offset = PRIVATE_DATA(cc);
if (offset != 0 && recurse_check_bit(common, offset))
{
SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
length += 2;
}
cc += 1;
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
if (PRIVATE_DATA(cc) != 0)
offset = PRIVATE_DATA(cc);
if (offset != 0 && recurse_check_bit(common, offset))
{
SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
length += 2;
}
cc += 1 + IMM2_SIZE;
break;
@ -2463,7 +2526,9 @@ while (cc < ccend)
#else
size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
#endif
if (PRIVATE_DATA(cc) != 0)
offset = PRIVATE_DATA(cc);
if (offset != 0 && recurse_check_bit(common, offset))
length += get_class_iterator_size(cc + size);
cc += size;
break;
@ -2473,12 +2538,11 @@ while (cc < ccend)
case OP_PRUNE_ARG:
case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
if (!setmark_found)
setmark_found = TRUE;
recurse_flags |= recurse_flag_setmark_found;
if (common->control_head_ptr != 0)
control_head_found = TRUE;
recurse_flags |= recurse_flag_control_head_found;
if (*cc != OP_MARK)
quit_found = TRUE;
recurse_flags |= recurse_flag_quit_found;
cc += 1 + 2 + cc[1];
break;
@ -2486,26 +2550,24 @@ while (cc < ccend)
case OP_PRUNE:
case OP_SKIP:
case OP_COMMIT:
quit_found = TRUE;
recurse_flags |= recurse_flag_quit_found;
cc++;
break;
case OP_SKIP_ARG:
quit_found = TRUE;
recurse_flags |= recurse_flag_quit_found;
cc += 1 + 2 + cc[1];
break;
case OP_THEN:
SLJIT_ASSERT(common->control_head_ptr != 0);
quit_found = TRUE;
if (!control_head_found)
control_head_found = TRUE;
recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
cc++;
break;
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
accept_found = TRUE;
recurse_flags |= recurse_flag_accept_found;
cc++;
break;
@ -2517,21 +2579,17 @@ while (cc < ccend)
}
SLJIT_ASSERT(cc == ccend);
if (control_head_found)
if (recurse_flags & recurse_flag_control_head_found)
length++;
if (capture_last_found)
length++;
if (quit_found)
if (recurse_flags & recurse_flag_quit_found)
{
if (setsom_found)
if (recurse_flags & recurse_flag_setsom_found)
length++;
if (setmark_found)
if (recurse_flags & recurse_flag_setmark_found)
length++;
}
*needs_control_head = control_head_found;
*has_quit = quit_found;
*has_accept = accept_found;
*result_flags = recurse_flags;
return length;
}
@ -2544,7 +2602,7 @@ enum copy_recurse_data_types {
};
static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
int type, int stackptr, int stacktop, BOOL has_quit)
int type, int stackptr, int stacktop, uint32_t recurse_flags)
{
delayed_mem_copy_status status;
PCRE2_SPTR alternative;
@ -2553,14 +2611,12 @@ sljit_sw shared_srcw[3];
sljit_sw kept_shared_srcw[2];
int private_count, shared_count, kept_shared_count;
int from_sp, base_reg, offset, i;
BOOL setsom_found = FALSE;
BOOL setmark_found = FALSE;
BOOL capture_last_found = FALSE;
BOOL control_head_found = FALSE;
memset(common->recurse_bitset, 0, common->recurse_bitset_size);
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
SLJIT_ASSERT(common->control_head_ptr != 0);
control_head_found = TRUE;
recurse_check_bit(common, common->control_head_ptr);
#endif
switch (type)
@ -2648,45 +2704,42 @@ while (cc < ccend)
{
case OP_SET_SOM:
SLJIT_ASSERT(common->has_set_som);
if (has_quit && !setsom_found)
if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
{
kept_shared_srcw[0] = OVECTOR(0);
kept_shared_count = 1;
setsom_found = TRUE;
}
cc += 1;
break;
case OP_RECURSE:
if (has_quit)
if (recurse_flags & recurse_flag_quit_found)
{
if (common->has_set_som && !setsom_found)
if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
{
kept_shared_srcw[0] = OVECTOR(0);
kept_shared_count = 1;
setsom_found = TRUE;
}
if (common->mark_ptr != 0 && !setmark_found)
if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
{
kept_shared_srcw[kept_shared_count] = common->mark_ptr;
kept_shared_count++;
setmark_found = TRUE;
}
}
if (common->capture_last_ptr != 0 && !capture_last_found)
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
{
shared_srcw[0] = common->capture_last_ptr;
shared_count = 1;
capture_last_found = TRUE;
}
cc += 1 + LINK_SIZE;
break;
case OP_KET:
if (PRIVATE_DATA(cc) != 0)
private_srcw[0] = PRIVATE_DATA(cc);
if (private_srcw[0] != 0)
{
private_count = 1;
private_srcw[0] = PRIVATE_DATA(cc);
if (recurse_check_bit(common, private_srcw[0]))
private_count = 1;
SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
cc += PRIVATE_DATA(cc + 1);
}
@ -2705,50 +2758,66 @@ while (cc < ccend)
case OP_SBRA:
case OP_SBRAPOS:
case OP_SCOND:
private_count = 1;
private_srcw[0] = PRIVATE_DATA(cc);
if (recurse_check_bit(common, private_srcw[0]))
private_count = 1;
cc += 1 + LINK_SIZE;
break;
case OP_CBRA:
case OP_SCBRA:
offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
shared_srcw[0] = OVECTOR(offset);
shared_srcw[1] = OVECTOR(offset + 1);
shared_count = 2;
if (common->capture_last_ptr != 0 && !capture_last_found)
offset = GET2(cc, 1 + LINK_SIZE);
shared_srcw[0] = OVECTOR(offset << 1);
if (recurse_check_bit(common, shared_srcw[0]))
{
shared_srcw[2] = common->capture_last_ptr;
shared_count = 3;
capture_last_found = TRUE;
shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
shared_count = 2;
}
if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
{
private_count = 1;
private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
shared_srcw[shared_count] = common->capture_last_ptr;
shared_count++;
}
if (common->optimized_cbracket[offset] == 0)
{
private_srcw[0] = OVECTOR_PRIV(offset);
if (recurse_check_bit(common, private_srcw[0]))
private_count = 1;
}
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
case OP_CBRAPOS:
case OP_SCBRAPOS:
offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
shared_srcw[0] = OVECTOR(offset);
shared_srcw[1] = OVECTOR(offset + 1);
shared_count = 2;
if (common->capture_last_ptr != 0 && !capture_last_found)
offset = GET2(cc, 1 + LINK_SIZE);
shared_srcw[0] = OVECTOR(offset << 1);
if (recurse_check_bit(common, shared_srcw[0]))
{
shared_srcw[2] = common->capture_last_ptr;
shared_count = 3;
capture_last_found = TRUE;
shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
shared_count = 2;
}
if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
{
shared_srcw[shared_count] = common->capture_last_ptr;
shared_count++;
}
private_count = 2;
private_srcw[0] = PRIVATE_DATA(cc);
private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
if (recurse_check_bit(common, private_srcw[0]))
private_count = 1;
offset = OVECTOR_PRIV(offset);
if (recurse_check_bit(common, offset))
{
private_srcw[private_count] = offset;
private_count++;
}
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
@ -2757,18 +2826,17 @@ while (cc < ccend)
alternative = cc + GET(cc, 1);
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
{
private_count = 1;
private_srcw[0] = PRIVATE_DATA(cc);
if (recurse_check_bit(common, private_srcw[0]))
private_count = 1;
}
cc += 1 + LINK_SIZE;
break;
CASE_ITERATOR_PRIVATE_DATA_1
if (PRIVATE_DATA(cc))
{
private_srcw[0] = PRIVATE_DATA(cc);
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
private_count = 1;
private_srcw[0] = PRIVATE_DATA(cc);
}
cc += 2;
#ifdef SUPPORT_UNICODE
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
@ -2776,11 +2844,12 @@ while (cc < ccend)
break;
CASE_ITERATOR_PRIVATE_DATA_2A
if (PRIVATE_DATA(cc))
private_srcw[0] = PRIVATE_DATA(cc);
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
{
private_count = 2;
private_srcw[0] = PRIVATE_DATA(cc);
private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
}
cc += 2;
#ifdef SUPPORT_UNICODE
@ -2789,11 +2858,12 @@ while (cc < ccend)
break;
CASE_ITERATOR_PRIVATE_DATA_2B
if (PRIVATE_DATA(cc))
private_srcw[0] = PRIVATE_DATA(cc);
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
{
private_count = 2;
private_srcw[0] = PRIVATE_DATA(cc);
private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
}
cc += 2 + IMM2_SIZE;
#ifdef SUPPORT_UNICODE
@ -2802,30 +2872,30 @@ while (cc < ccend)
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
if (PRIVATE_DATA(cc))
{
private_srcw[0] = PRIVATE_DATA(cc);
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
private_count = 1;
private_srcw[0] = PRIVATE_DATA(cc);
}
cc += 1;
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
if (PRIVATE_DATA(cc))
private_srcw[0] = PRIVATE_DATA(cc);
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
{
private_count = 2;
private_srcw[0] = PRIVATE_DATA(cc);
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
}
cc += 1;
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
if (PRIVATE_DATA(cc))
private_srcw[0] = PRIVATE_DATA(cc);
if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
{
private_count = 2;
private_srcw[0] = PRIVATE_DATA(cc);
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
}
cc += 1 + IMM2_SIZE;
break;
@ -2842,14 +2912,17 @@ while (cc < ccend)
switch(get_class_iterator_size(cc + i))
{
case 1:
private_count = 1;
private_srcw[0] = PRIVATE_DATA(cc);
break;
case 2:
private_count = 2;
private_srcw[0] = PRIVATE_DATA(cc);
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
if (recurse_check_bit(common, private_srcw[0]))
{
private_count = 2;
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
}
break;
default:
@ -2864,28 +2937,25 @@ while (cc < ccend)
case OP_PRUNE_ARG:
case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
if (has_quit && !setmark_found)
if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
{
kept_shared_srcw[0] = common->mark_ptr;
kept_shared_count = 1;
setmark_found = TRUE;
}
if (common->control_head_ptr != 0 && !control_head_found)
if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
{
private_srcw[0] = common->control_head_ptr;
private_count = 1;
control_head_found = TRUE;
}
cc += 1 + 2 + cc[1];
break;
case OP_THEN:
SLJIT_ASSERT(common->control_head_ptr != 0);
if (!control_head_found)
if (recurse_check_bit(common, common->control_head_ptr))
{
private_srcw[0] = common->control_head_ptr;
private_count = 1;
control_head_found = TRUE;
}
cc++;
break;
@ -2893,7 +2963,7 @@ while (cc < ccend)
default:
cc = next_opcode(common, cc);
SLJIT_ASSERT(cc != NULL);
break;
continue;
}
if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
@ -13407,10 +13477,8 @@ DEFINE_COMPILER;
PCRE2_SPTR cc = common->start + common->currententry->start;
PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
BOOL needs_control_head;
BOOL has_quit;
BOOL has_accept;
int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
uint32_t recurse_flags = 0;
int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
int alt_count, alt_max, local_size;
backtrack_common altbacktrack;
jump_list *match = NULL;
@ -13444,12 +13512,12 @@ allocate_stack(common, private_data_size + local_size);
/* Save return address. */
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
/* This variable is saved and restored all time when we enter or exit from a recursive context. */
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
if (needs_control_head)
if (recurse_flags & recurse_flag_control_head_found)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
if (alt_max > 1)
@ -13474,10 +13542,10 @@ while (1)
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return;
allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
if (alt_max > 1 || has_accept)
if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
{
if (alt_max > 3)
put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
@ -13496,14 +13564,14 @@ while (1)
sljit_emit_fast_enter(compiler, TMP1, 0);
if (has_accept)
if (recurse_flags & recurse_flag_accept_found)
accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
/* Save return address. */
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
if (alt_max > 1)
{
@ -13520,7 +13588,7 @@ while (1)
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
}
else
free_stack(common, has_accept ? 2 : 1);
free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
}
else if (alt_max > 3)
{
@ -13555,7 +13623,7 @@ while (1)
quit = LABEL();
copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
free_stack(common, private_data_size + local_size);
@ -13564,15 +13632,15 @@ OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
if (common->quit != NULL)
{
SLJIT_ASSERT(has_quit);
SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
set_jumps(common->quit, LABEL());
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
JUMPTO(SLJIT_JUMP, quit);
}
if (has_accept)
if (recurse_flags & recurse_flag_accept_found)
{
JUMPHERE(accept_exit);
free_stack(common, 2);
@ -13580,7 +13648,7 @@ if (has_accept)
/* Save return address. */
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
free_stack(common, private_data_size + local_size);
@ -13590,7 +13658,7 @@ if (has_accept)
if (common->accept != NULL)
{
SLJIT_ASSERT(has_accept);
SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
set_jumps(common->accept, LABEL());
@ -13605,7 +13673,7 @@ set_jumps(match, LABEL());
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
@ -13811,7 +13879,7 @@ SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
total_length = ccend - common->start;
common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
if (!common->private_data_ptrs)
{
SLJIT_FREE(common->optimized_cbracket, allocator_data);
@ -13852,6 +13920,7 @@ if (!compiler)
common->compiler = compiler;
/* Main pcre2_jit_exec entry. */
SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, 0, 0, private_data_size);
/* Register init. */
@ -14074,20 +14143,40 @@ common->early_fail_end_ptr = 0;
common->currententry = common->entries;
common->local_quit_available = TRUE;
quit_label = common->quit_label;
while (common->currententry != NULL)
if (common->currententry != NULL)
{
/* Might add new entries. */
compile_recurse(common);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
/* A free bit for each private data. */
common->recurse_bitset_size = ((private_data_size / (int)sizeof(sljit_sw)) + 7) >> 3;
SLJIT_ASSERT(common->recurse_bitset_size > 0);
common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
if (common->recurse_bitset != NULL)
{
do
{
/* Might add new entries. */
compile_recurse(common);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
break;
flush_stubs(common);
common->currententry = common->currententry->next;
}
while (common->currententry != NULL);
SLJIT_FREE(common->recurse_bitset, allocator_data);
}
if (common->currententry != NULL)
{
/* The common->recurse_bitset has been freed. */
SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
sljit_free_compiler(compiler);
SLJIT_FREE(common->optimized_cbracket, allocator_data);
SLJIT_FREE(common->private_data_ptrs, allocator_data);
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
return PCRE2_ERROR_NOMEMORY;
}
flush_stubs(common);
common->currententry = common->currententry->next;
}
common->local_quit_available = FALSE;
common->quit_label = quit_label;

View File

@ -751,6 +751,7 @@ static struct regression_test_case regression_test_cases[] = {
{ MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
{ MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
{ MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
{ MU, A, 0, 0 | F_NOMATCH, "(?1)$((.|\\2xx){1,2})", "abc" },
/* 16 bit specific tests. */
{ CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },