Rework alternative matching in JIT.

This commit is contained in:
Zoltán Herczeg 2019-07-18 06:11:04 +00:00
parent c11b23e8cc
commit f5b35e7943
3 changed files with 66 additions and 98 deletions

View File

@ -213,12 +213,6 @@ typedef struct stub_list {
struct stub_list *next;
} stub_list;
typedef struct label_addr_list {
struct sljit_label *label;
sljit_uw *update_addr;
struct label_addr_list *next;
} label_addr_list;
enum frame_types {
no_frame = -1,
no_stack = -2
@ -272,6 +266,8 @@ typedef struct bracket_backtrack {
assert_backtrack *assert;
/* For OP_ONCE. Less than 0 if not needed. */
int framesize;
/* For brackets with >3 alternatives. */
struct sljit_put_label *matching_put_label;
} u;
/* Points to our private memory word on the stack. */
int private_data_ptr;
@ -455,7 +451,6 @@ typedef struct compiler_common {
struct sljit_label *accept_label;
struct sljit_label *ff_newline_shortcut;
stub_list *stubs;
label_addr_list *label_addrs;
recurse_entry *entries;
recurse_entry *currententry;
jump_list *partialmatch;
@ -2849,20 +2844,6 @@ while (list_item)
common->stubs = NULL;
}
static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
{
DEFINE_COMPILER;
label_addr_list *label_addr;
label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
if (label_addr == NULL)
return;
label_addr->label = LABEL();
label_addr->update_addr = update_addr;
label_addr->next = common->label_addrs;
common->label_addrs = label_addr;
}
static SLJIT_INLINE void count_match(compiler_common *common)
{
DEFINE_COMPILER;
@ -10880,10 +10861,23 @@ if (ket != OP_KET || bra != OP_BRA)
if (offset != 0)
stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
/* Skip and count the other alternatives. */
i = 1;
while (*cc == OP_ALT)
{
cc += GET(cc, 1);
i++;
}
if (has_alternatives)
{
if (opcode != OP_ONCE)
{
if (i <= 3)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
else
BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
}
if (ket != OP_KETRMAX)
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
}
@ -10972,9 +10966,6 @@ if (bra == OP_BRAMINZERO)
if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
count_match(common);
/* Skip the other alternatives. */
while (*cc == OP_ALT)
cc += GET(cc, 1);
cc += 1 + LINK_SIZE;
if (opcode == OP_ONCE)
@ -12643,16 +12634,15 @@ PCRE2_SPTR ccprev;
PCRE2_UCHAR bra = OP_BRA;
PCRE2_UCHAR ket;
assert_backtrack *assert;
sljit_uw *next_update_addr = NULL;
BOOL has_alternatives;
BOOL needs_control_head = FALSE;
struct sljit_jump *brazero = NULL;
struct sljit_jump *alt1 = NULL;
struct sljit_jump *alt2 = NULL;
struct sljit_jump *next_alt = NULL;
struct sljit_jump *once = NULL;
struct sljit_jump *cond = NULL;
struct sljit_label *rmin_label = NULL;
struct sljit_label *exact_label = NULL;
struct sljit_put_label *put_label;
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
{
@ -12801,7 +12791,7 @@ else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)
free_stack(common, 1);
alt_max = 2;
alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
}
}
else if (has_alternatives)
@ -12809,21 +12799,15 @@ else if (has_alternatives)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
free_stack(common, 1);
if (alt_max > 4)
if (alt_max > 3)
{
/* Table jump if alt_max is greater than 4. */
next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
if (SLJIT_UNLIKELY(next_update_addr == NULL))
return;
sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
add_label_addr(common, next_update_addr++);
sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
}
else
{
if (alt_max == 4)
alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
}
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
}
COMPILE_BACKTRACKINGPATH(current->top);
@ -12860,7 +12844,7 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
if (has_alternatives)
{
alt_count = sizeof(sljit_uw);
alt_count = 1;
do
{
current->top = NULL;
@ -12939,7 +12923,12 @@ if (has_alternatives)
stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
if (opcode != OP_ONCE)
{
if (alt_max <= 3)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
else
put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
}
if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
{
@ -12952,24 +12941,18 @@ if (has_alternatives)
if (opcode != OP_ONCE)
{
if (alt_max > 4)
add_label_addr(common, next_update_addr++);
else
if (alt_max <= 3)
{
if (alt_count != 2 * sizeof(sljit_uw))
JUMPHERE(next_alt);
alt_count++;
if (alt_count < alt_max)
{
JUMPHERE(alt1);
if (alt_max == 3 && alt_count == sizeof(sljit_uw))
alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
}
}
else
{
JUMPHERE(alt2);
if (alt_max == 4)
alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
}
}
alt_count += sizeof(sljit_uw);
sljit_set_put_label(put_label, LABEL());
}
COMPILE_BACKTRACKINGPATH(current->top);
@ -13459,11 +13442,10 @@ int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_c
int alt_count, alt_max, local_size;
backtrack_common altbacktrack;
jump_list *match = NULL;
sljit_uw *next_update_addr = NULL;
struct sljit_jump *alt1 = NULL;
struct sljit_jump *alt2 = NULL;
struct sljit_jump *next_alt = NULL;
struct sljit_jump *accept_exit = NULL;
struct sljit_label *quit;
struct sljit_put_label *put_label;
/* Recurse captures then. */
common->then_trap = NULL;
@ -13524,7 +13506,12 @@ while (1)
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
if (alt_max > 1 || has_accept)
{
if (alt_max > 3)
put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
else
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
}
add_jump(compiler, &match, JUMP(SLJIT_JUMP));
@ -13538,7 +13525,7 @@ while (1)
sljit_emit_fast_enter(compiler, TMP1, 0);
if (has_accept)
accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_max * sizeof (sljit_sw));
accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
/* Save return address. */
@ -13551,44 +13538,30 @@ while (1)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
free_stack(common, 2);
if (alt_max > 4)
if (alt_max > 3)
{
/* Table jump if alt_max is greater than 4. */
next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw));
if (SLJIT_UNLIKELY(next_update_addr == NULL))
return;
sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr);
add_label_addr(common, next_update_addr++);
sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
sljit_set_put_label(put_label, LABEL());
}
else
{
if (alt_max == 4)
alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw));
}
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
}
else
free_stack(common, has_accept ? 2 : 1);
}
else if (alt_max > 4)
add_label_addr(common, next_update_addr++);
else if (alt_max > 3)
sljit_set_put_label(put_label, LABEL());
else
{
if (alt_count != 2 * sizeof(sljit_uw))
JUMPHERE(next_alt);
if (alt_count + 1 < alt_max)
{
JUMPHERE(alt1);
if (alt_max == 3 && alt_count == sizeof(sljit_uw))
alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw));
}
else
{
JUMPHERE(alt2);
if (alt_max == 4)
alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw));
SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
}
}
alt_count += sizeof(sljit_uw);
alt_count++;
compile_backtrackingpath(common, altbacktrack.top);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
@ -13649,7 +13622,7 @@ if (common->accept != NULL)
OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
allocate_stack(common, 2);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
}
set_jumps(match, LABEL());
@ -13684,7 +13657,6 @@ executable_functions *functions;
void *executable_func;
sljit_uw executable_size;
sljit_uw total_length;
label_addr_list *label_addr;
struct sljit_label *mainloop_label = NULL;
struct sljit_label *continue_match_label;
struct sljit_label *empty_match_found_label = NULL;
@ -14276,13 +14248,8 @@ SLJIT_FREE(common->private_data_ptrs, allocator_data);
executable_func = sljit_generate_code(compiler);
executable_size = sljit_get_generated_code_size(compiler);
label_addr = common->label_addrs;
while (label_addr != NULL)
{
*label_addr->update_addr = sljit_get_label_addr(label_addr->label);
label_addr = label_addr->next;
}
sljit_free_compiler(compiler);
if (executable_func == NULL)
{
PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data);

View File

@ -721,6 +721,7 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp
static SLJIT_INLINE void set_put_label(struct sljit_put_label *put_label, struct sljit_compiler *compiler, sljit_uw offset)
{
put_label->next = NULL;
put_label->label = NULL;
put_label->addr = compiler->size - offset;
put_label->flags = 0;
if (compiler->last_put_label)

View File

@ -611,15 +611,15 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
put_label = compiler->put_labels;
while (put_label) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
sljit_unaligned_store_sw((void*)put_label->addr - sizeof(sljit_sw), (sljit_sw)put_label->label->addr);
sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
#else
if (put_label->flags & PATCH_MD) {
SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX);
sljit_unaligned_store_sw((void*)put_label->addr - sizeof(sljit_sw), (sljit_sw)put_label->label->addr);
sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
}
else {
SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX);
sljit_unaligned_store_s32((void*)put_label->addr - sizeof(sljit_s32), (sljit_s32)put_label->label->addr);
sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr);
}
#endif