Support full ovector data for JIT callouts.

This commit is contained in:
Zoltán Herczeg 2017-03-25 06:33:41 +00:00
parent 39d9294d14
commit 25cec7a966
3 changed files with 84 additions and 25 deletions

View File

@ -355,6 +355,8 @@ typedef struct then_trap_backtrack {
typedef struct compiler_common { typedef struct compiler_common {
/* The sljit ceneric compiler. */ /* The sljit ceneric compiler. */
struct sljit_compiler *compiler; struct sljit_compiler *compiler;
/* Compiled regular expression. */
pcre2_real_code *re;
/* First byte code. */ /* First byte code. */
PCRE2_SPTR start; PCRE2_SPTR start;
/* Maps private data offset to each opcode. */ /* Maps private data offset to each opcode. */
@ -3551,7 +3553,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
#endif /* SUPPORT_UNICODE */ #endif /* SUPPORT_UNICODE */
static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, sljit_u32 overall_options) static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
{ {
DEFINE_COMPILER; DEFINE_COMPILER;
struct sljit_label *mainloop; struct sljit_label *mainloop;
@ -3563,6 +3565,8 @@ struct sljit_jump *end2 = NULL;
struct sljit_jump *singlechar; struct sljit_jump *singlechar;
#endif #endif
jump_list *newline = NULL; jump_list *newline = NULL;
sljit_u32 overall_options = common->re->overall_options;
BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
BOOL newlinecheck = FALSE; BOOL newlinecheck = FALSE;
BOOL readuchar = FALSE; BOOL readuchar = FALSE;
@ -4803,8 +4807,10 @@ return TRUE;
#undef MAX_N_CHARS #undef MAX_N_CHARS
static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, PCRE2_UCHAR first_char, BOOL caseless) static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
{ {
PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
BOOL caseless = (common->re->flags & PCRE2_FIRSTCASELESS) != 0;
PCRE2_UCHAR oc; PCRE2_UCHAR oc;
oc = first_char; oc = first_char;
@ -4909,9 +4915,10 @@ if (common->match_end_ptr != 0)
static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks); static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits) static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
{ {
DEFINE_COMPILER; DEFINE_COMPILER;
const sljit_u8 *start_bits = common->re->start_bitmap;
struct sljit_label *start; struct sljit_label *start;
struct sljit_jump *quit; struct sljit_jump *quit;
struct sljit_jump *found = NULL; struct sljit_jump *found = NULL;
@ -7378,37 +7385,70 @@ return cc + 1 + LINK_SIZE;
static int SLJIT_CALL do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) static int SLJIT_CALL do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
{ {
PCRE2_SPTR begin = arguments->begin; PCRE2_SPTR begin;
PCRE2_SIZE *ovector = arguments->match_data->ovector; PCRE2_SIZE *ovector_ptr;
sljit_u32 oveccount = arguments->oveccount; PCRE2_SPTR *jit_ovector_ptr;
sljit_u32 i; PCRE2_SPTR saved_ovector0;
sljit_u32 oveccount, i, retval;
if (arguments->callout == NULL) if (arguments->callout == NULL)
return 0; return 0;
SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
begin = arguments->begin;
ovector_ptr = (PCRE2_SIZE*)jit_ovector;
jit_ovector_ptr = jit_ovector;
oveccount = callout_block->capture_top;
saved_ovector0 = jit_ovector_ptr[0];
jit_ovector_ptr[0] = begin - 1;
SLJIT_ASSERT(jit_ovector_ptr[1] == begin - 1);
callout_block->version = 1; callout_block->version = 1;
/* Offsets in subject. */ /* Offsets in subject. */
callout_block->subject_length = arguments->end - arguments->begin; callout_block->subject_length = arguments->end - arguments->begin;
callout_block->start_match = (PCRE2_SPTR)callout_block->subject - arguments->begin; callout_block->start_match = saved_ovector0 - begin;
callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - arguments->begin; callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
callout_block->subject = begin; callout_block->subject = begin;
/* Convert and copy the JIT offset vector to the ovector array. */ /* Convert and copy the JIT offset vector to the ovector array. */
callout_block->capture_top = 0; callout_block->capture_top = 0;
callout_block->offset_vector = ovector; callout_block->offset_vector = ovector_ptr;
for (i = 2; i < oveccount; i += 2)
/* Convert pointers to sizes. */
for (i = 0; i < oveccount; i++)
{ {
ovector[i] = jit_ovector[i] - begin; ovector_ptr[0] = (PCRE2_SIZE)(jit_ovector_ptr[0] - begin);
ovector[i + 1] = jit_ovector[i + 1] - begin; ovector_ptr[1] = (PCRE2_SIZE)(jit_ovector_ptr[1] - begin);
if (jit_ovector[i] >= begin)
if (ovector_ptr[0] != PCRE2_UNSET)
callout_block->capture_top = i; callout_block->capture_top = i;
ovector_ptr += 2;
jit_ovector_ptr += 2;
} }
callout_block->capture_top = (callout_block->capture_top >> 1) + 1; callout_block->capture_top++;
ovector[0] = PCRE2_UNSET;
ovector[1] = PCRE2_UNSET; retval = (arguments->callout)(callout_block, arguments->callout_data);
return (arguments->callout)(callout_block, arguments->callout_data);
ovector_ptr = ((PCRE2_SIZE*)jit_ovector) + oveccount * 2;
jit_ovector_ptr = jit_ovector + oveccount * 2;
/* Reverse conversion. */
for (i = 0; i < oveccount; i++)
{
ovector_ptr -= 2;
jit_ovector_ptr -= 2;
jit_ovector_ptr[0] = begin + ovector_ptr[0];
jit_ovector_ptr[1] = begin + ovector_ptr[1];
}
jit_ovector_ptr[0] = saved_ovector0;
return retval;
} }
/* Aligning to 8 byte. */ /* Aligning to 8 byte. */
@ -7439,11 +7479,10 @@ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0; value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1); OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0); OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
/* These pointer sized fields temporarly stores internal variables. */ /* These pointer sized fields temporarly stores internal variables. */
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
if (common->mark_ptr != 0) if (common->mark_ptr != 0)
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
@ -11152,6 +11191,7 @@ SLJIT_ASSERT(tables);
memset(&rootbacktrack, 0, sizeof(backtrack_common)); memset(&rootbacktrack, 0, sizeof(backtrack_common));
memset(common, 0, sizeof(compiler_common)); memset(common, 0, sizeof(compiler_common));
common->re = re;
common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)); common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size; rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
@ -11369,7 +11409,7 @@ if (common->control_head_ptr != 0)
/* Main part of the matching */ /* Main part of the matching */
if ((re->overall_options & PCRE2_ANCHORED) == 0) if ((re->overall_options & PCRE2_ANCHORED) == 0)
{ {
mainloop_label = mainloop_entry(common, (re->flags & PCRE2_HASCRORLF) != 0, re->overall_options); mainloop_label = mainloop_entry(common);
continue_match_label = LABEL(); continue_match_label = LABEL();
/* Forward search if possible. */ /* Forward search if possible. */
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
@ -11377,11 +11417,11 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0)
if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common)) if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
; ;
else if ((re->flags & PCRE2_FIRSTSET) != 0) else if ((re->flags & PCRE2_FIRSTSET) != 0)
fast_forward_first_char(common, (PCRE2_UCHAR)(re->first_codeunit), (re->flags & PCRE2_FIRSTCASELESS) != 0); fast_forward_first_char(common);
else if ((re->flags & PCRE2_STARTLINE) != 0) else if ((re->flags & PCRE2_STARTLINE) != 0)
fast_forward_newline(common); fast_forward_newline(common);
else if ((re->flags & PCRE2_FIRSTMAPSET) != 0) else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
fast_forward_start_bits(common, re->start_bitmap); fast_forward_start_bits(common);
} }
} }
else else

5
testdata/testinput2 vendored
View File

@ -3648,7 +3648,7 @@
/(?:(a)+(?C1)bb|aa(?C2)b)++/ /(?:(a)+(?C1)bb|aa(?C2)b)++/
aab\=callout_capture aab\=callout_capture
aab\=callout_capture,ovector=1,no_jit aab\=callout_capture,ovector=1
/(ab)x|ab/ /(ab)x|ab/
ab\=ovector=0 ab\=ovector=0
@ -4996,6 +4996,9 @@ a)"xI
/\g{3/ /\g{3/
/(a(?C1)(b)(c)d)+/
abcdabcd\=callout_capture
# Perl matches this one, but PCRE does not because (*ACCEPT) clears out any # Perl matches this one, but PCRE does not because (*ACCEPT) clears out any
# pending backtracks in the recursion. # pending backtracks in the recursion.

18
testdata/testoutput2 vendored
View File

@ -11703,7 +11703,7 @@ Callout 2: last capture = 0
--->aab --->aab
^ ^ b ^ ^ b
0: aab 0: aab
aab\=callout_capture,ovector=1,no_jit aab\=callout_capture,ovector=1
Callout 1: last capture = 1 Callout 1: last capture = 1
1: a 1: a
--->aab --->aab
@ -15506,6 +15506,22 @@ No match
/\g{3/ /\g{3/
Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number
/(a(?C1)(b)(c)d)+/
abcdabcd\=callout_capture
Callout 1: last capture = 0
--->abcdabcd
^^ (
Callout 1: last capture = 1
1: abcd
2: b
3: c
--->abcdabcd
^ ^ (
0: abcdabcd
1: abcd
2: b
3: c
# Perl matches this one, but PCRE does not because (*ACCEPT) clears out any # Perl matches this one, but PCRE does not because (*ACCEPT) clears out any
# pending backtracks in the recursion. # pending backtracks in the recursion.