Add option bits for read_char in JIT.
This commit is contained in:
parent
baa91ecc79
commit
142c667bbc
|
@ -485,6 +485,7 @@ typedef struct compiler_common {
|
||||||
jump_list *getucdtype;
|
jump_list *getucdtype;
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
jump_list *utfreadchar;
|
jump_list *utfreadchar;
|
||||||
|
jump_list *utfreadchar_invalid_precise;
|
||||||
jump_list *utfreadtype8;
|
jump_list *utfreadtype8;
|
||||||
jump_list *utfpeakcharback;
|
jump_list *utfpeakcharback;
|
||||||
#endif
|
#endif
|
||||||
|
@ -3462,8 +3463,13 @@ if (common->utf)
|
||||||
#endif /* SUPPORT_UNICODE */
|
#endif /* SUPPORT_UNICODE */
|
||||||
}
|
}
|
||||||
|
|
||||||
static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max,
|
#define READ_CHAR_UPDATE_STR_PTR 0x1
|
||||||
jump_list **backtracks, BOOL update_str_ptr)
|
#define READ_CHAR_UPDATE_STR_PTR_INVALID 0x2
|
||||||
|
#define READ_CHAR_UPDATE_STR_PTR_PRECISE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UPDATE_STR_PTR_INVALID)
|
||||||
|
#define READ_CHAR_VALID_UTF 0x4
|
||||||
|
|
||||||
|
static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
|
||||||
|
jump_list **backtracks, sljit_u32 options)
|
||||||
{
|
{
|
||||||
/* Reads the precise value of a character into TMP1, if the character is
|
/* Reads the precise value of a character into TMP1, if the character is
|
||||||
between min and max (c >= min && c <= max). Otherwise it returns with a value
|
between min and max (c >= min && c <= max). Otherwise it returns with a value
|
||||||
|
@ -3476,24 +3482,30 @@ struct sljit_jump *jump;
|
||||||
struct sljit_jump *jump2;
|
struct sljit_jump *jump2;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
SLJIT_UNUSED_ARG(update_str_ptr);
|
|
||||||
SLJIT_UNUSED_ARG(min);
|
SLJIT_UNUSED_ARG(min);
|
||||||
SLJIT_UNUSED_ARG(max);
|
SLJIT_UNUSED_ARG(max);
|
||||||
SLJIT_UNUSED_ARG(backtracks);
|
SLJIT_UNUSED_ARG(backtracks);
|
||||||
|
SLJIT_UNUSED_ARG(options);
|
||||||
SLJIT_ASSERT(min <= max);
|
SLJIT_ASSERT(min <= max);
|
||||||
|
|
||||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
|
||||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
|
|
||||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
#ifdef SUPPORT_UNICODE
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
if (common->utf)
|
if (common->utf)
|
||||||
{
|
{
|
||||||
if (max < 128 && !update_str_ptr) return;
|
if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
|
||||||
|
|
||||||
if (common->invalid_utf)
|
if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
|
||||||
{
|
{
|
||||||
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
|
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
|
||||||
|
|
||||||
|
if (options & READ_CHAR_UPDATE_STR_PTR_INVALID)
|
||||||
|
add_jump(compiler, &common->utfreadchar_invalid_precise, JUMP(SLJIT_FAST_CALL));
|
||||||
|
else
|
||||||
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
|
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
|
||||||
|
|
||||||
if (backtracks != NULL)
|
if (backtracks != NULL)
|
||||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
|
||||||
JUMPHERE(jump);
|
JUMPHERE(jump);
|
||||||
|
@ -3504,7 +3516,7 @@ if (common->utf)
|
||||||
if (min >= 0x10000)
|
if (min >= 0x10000)
|
||||||
{
|
{
|
||||||
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
|
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
|
||||||
if (update_str_ptr)
|
if (options & READ_CHAR_UPDATE_STR_PTR)
|
||||||
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
|
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
|
||||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
|
||||||
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
|
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
|
||||||
|
@ -3516,19 +3528,19 @@ if (common->utf)
|
||||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
|
||||||
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
|
||||||
if (!update_str_ptr)
|
if (!(options & READ_CHAR_UPDATE_STR_PTR))
|
||||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
|
||||||
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
|
||||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
|
||||||
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
JUMPHERE(jump2);
|
JUMPHERE(jump2);
|
||||||
if (update_str_ptr)
|
if (options & READ_CHAR_UPDATE_STR_PTR)
|
||||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
|
||||||
}
|
}
|
||||||
else if (min >= 0x800 && max <= 0xffff)
|
else if (min >= 0x800 && max <= 0xffff)
|
||||||
{
|
{
|
||||||
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
|
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
|
||||||
if (update_str_ptr)
|
if (options & READ_CHAR_UPDATE_STR_PTR)
|
||||||
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
|
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
|
||||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
|
||||||
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
|
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
|
||||||
|
@ -3536,13 +3548,13 @@ if (common->utf)
|
||||||
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
|
||||||
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
|
||||||
if (!update_str_ptr)
|
if (!(options & READ_CHAR_UPDATE_STR_PTR))
|
||||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
|
||||||
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
|
||||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
|
||||||
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
JUMPHERE(jump2);
|
JUMPHERE(jump2);
|
||||||
if (update_str_ptr)
|
if (options & READ_CHAR_UPDATE_STR_PTR)
|
||||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
|
||||||
}
|
}
|
||||||
else if (max >= 0x800)
|
else if (max >= 0x800)
|
||||||
|
@ -3557,7 +3569,7 @@ if (common->utf)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
|
||||||
if (!update_str_ptr)
|
if (!(options & READ_CHAR_UPDATE_STR_PTR))
|
||||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
else
|
else
|
||||||
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
|
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
|
||||||
|
@ -3565,31 +3577,30 @@ if (common->utf)
|
||||||
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
|
||||||
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
|
||||||
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
if (update_str_ptr)
|
if (options & READ_CHAR_UPDATE_STR_PTR)
|
||||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
|
||||||
}
|
}
|
||||||
JUMPHERE(jump);
|
JUMPHERE(jump);
|
||||||
}
|
}
|
||||||
#endif
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
|
||||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
|
|
||||||
if (common->utf)
|
if (common->utf)
|
||||||
{
|
{
|
||||||
if (max < 0xd800 && !update_str_ptr) return;
|
if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
|
||||||
|
|
||||||
if (max >= 0x10000 || common->invalid_utf)
|
if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
|
||||||
{
|
{
|
||||||
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
|
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
|
||||||
|
|
||||||
if (common->invalid_utf)
|
|
||||||
{
|
|
||||||
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
|
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
|
||||||
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
|
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
|
||||||
if (backtracks != NULL)
|
if (backtracks != NULL)
|
||||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
|
||||||
|
JUMPHERE(jump);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
if (max >= 0x10000)
|
||||||
{
|
{
|
||||||
|
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
|
||||||
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
|
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
|
||||||
/* TMP2 contains the high surrogate. */
|
/* TMP2 contains the high surrogate. */
|
||||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
|
||||||
|
@ -3597,7 +3608,6 @@ if (common->utf)
|
||||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
|
||||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
}
|
|
||||||
JUMPHERE(jump);
|
JUMPHERE(jump);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -3605,13 +3615,25 @@ if (common->utf)
|
||||||
/* Skip low surrogate if necessary. */
|
/* Skip low surrogate if necessary. */
|
||||||
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
|
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
|
||||||
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
|
jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
|
||||||
if (update_str_ptr)
|
if (options & READ_CHAR_UPDATE_STR_PTR)
|
||||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
if (max >= 0xd800)
|
if (max >= 0xd800)
|
||||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
|
||||||
JUMPHERE(jump);
|
JUMPHERE(jump);
|
||||||
}
|
}
|
||||||
#endif
|
#elif PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
|
if (common->invalid_utf)
|
||||||
|
{
|
||||||
|
if (backtracks != NULL)
|
||||||
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
|
||||||
|
CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
@ -3646,6 +3668,7 @@ SLJIT_ASSERT(common->utf);
|
||||||
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
|
||||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
|
|
||||||
|
/* All values > 127 are zero in ctypes. */
|
||||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
|
||||||
|
|
||||||
if (negated)
|
if (negated)
|
||||||
|
@ -3700,14 +3723,15 @@ if (common->utf)
|
||||||
|
|
||||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
|
||||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
|
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
|
||||||
if (common->invalid_utf)
|
if (common->invalid_utf)
|
||||||
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1f));
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
|
||||||
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
|
||||||
if (common->invalid_utf)
|
if (common->invalid_utf)
|
||||||
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3f));
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
|
||||||
|
|
||||||
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
|
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
|
||||||
|
@ -3718,6 +3742,7 @@ if (common->utf)
|
||||||
else if (common->invalid_utf)
|
else if (common->invalid_utf)
|
||||||
{
|
{
|
||||||
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
|
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
|
||||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
|
||||||
|
|
||||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
|
||||||
|
@ -3970,6 +3995,122 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
|
||||||
static void do_utfreadchar_invalid(compiler_common *common)
|
static void do_utfreadchar_invalid(compiler_common *common)
|
||||||
{
|
{
|
||||||
/* Slow decoding a UTF-8 character. TMP1 contains the first byte
|
/* Slow decoding a UTF-8 character. TMP1 contains the first byte
|
||||||
|
of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
|
||||||
|
undefined for invalid characters. */
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
sljit_s32 i;
|
||||||
|
struct sljit_jump *jump;
|
||||||
|
struct sljit_jump *buffer_end_close;
|
||||||
|
struct sljit_label *three_byte_entry;
|
||||||
|
struct sljit_label *exit_invalid_label;
|
||||||
|
struct sljit_jump *exit_invalid[11];
|
||||||
|
|
||||||
|
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
|
||||||
|
|
||||||
|
/* Usually more than 3 characters remained in the subject buffer. */
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
|
||||||
|
|
||||||
|
/* Not a valid start of a multi-byte sequence, no more bytes read. */
|
||||||
|
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
|
||||||
|
|
||||||
|
buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
|
||||||
|
|
||||||
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
|
||||||
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
|
||||||
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
|
||||||
|
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
|
||||||
|
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
|
||||||
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
|
|
||||||
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
|
||||||
|
jump = JUMP(SLJIT_NOT_ZERO);
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
|
||||||
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
|
||||||
|
|
||||||
|
JUMPHERE(jump);
|
||||||
|
|
||||||
|
/* Three-byte sequence. */
|
||||||
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
|
||||||
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
|
||||||
|
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
|
||||||
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
|
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
|
||||||
|
|
||||||
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
|
||||||
|
jump = JUMP(SLJIT_NOT_ZERO);
|
||||||
|
|
||||||
|
three_byte_entry = LABEL();
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
|
||||||
|
exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
|
||||||
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
|
|
||||||
|
exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
|
||||||
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
|
||||||
|
|
||||||
|
JUMPHERE(jump);
|
||||||
|
|
||||||
|
/* Four-byte sequence. */
|
||||||
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
|
||||||
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
|
||||||
|
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
|
||||||
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
|
exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
|
||||||
|
exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
|
||||||
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
|
||||||
|
|
||||||
|
JUMPHERE(buffer_end_close);
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
|
||||||
|
exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
|
||||||
|
|
||||||
|
/* Two-byte sequence. */
|
||||||
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
|
||||||
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
|
||||||
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
|
||||||
|
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
|
||||||
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
|
exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
|
||||||
|
|
||||||
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
|
||||||
|
jump = JUMP(SLJIT_NOT_ZERO);
|
||||||
|
|
||||||
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
|
||||||
|
|
||||||
|
/* Three-byte sequence. */
|
||||||
|
JUMPHERE(jump);
|
||||||
|
exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||||
|
|
||||||
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
|
||||||
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
|
||||||
|
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
|
||||||
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
|
exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
|
||||||
|
|
||||||
|
/* One will be substracted from STR_PTR later. */
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
|
||||||
|
|
||||||
|
/* Four byte sequences are not possible. */
|
||||||
|
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
|
||||||
|
|
||||||
|
exit_invalid_label = LABEL();
|
||||||
|
for (i = 0; i < 11; i++)
|
||||||
|
sljit_set_label(exit_invalid[i], exit_invalid_label);
|
||||||
|
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
|
||||||
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void do_utfreadchar_invalid_precise(compiler_common *common)
|
||||||
|
{
|
||||||
|
/* Slow decoding a UTF-8 character. TMP1 contains the first byte
|
||||||
of the character (>= 0xc0). Return char value in TMP1. */
|
of the character (>= 0xc0). Return char value in TMP1. */
|
||||||
DEFINE_COMPILER;
|
DEFINE_COMPILER;
|
||||||
struct sljit_jump *jump;
|
struct sljit_jump *jump;
|
||||||
|
@ -3987,7 +4128,7 @@ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
|
||||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
|
||||||
|
|
||||||
/* Not a valid start of a multi-byte sequence, no more bytes read. */
|
/* Not a valid start of a multi-byte sequence, no more bytes read. */
|
||||||
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf8 - 0xc);
|
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf8 - 0xc0);
|
||||||
|
|
||||||
buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
|
buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
|
||||||
|
|
||||||
|
@ -4576,7 +4717,7 @@ if ((overall_options & PCRE2_FIRSTLINE) != 0)
|
||||||
mainloop = LABEL();
|
mainloop = LABEL();
|
||||||
/* Continual stores does not cause data dependency. */
|
/* Continual stores does not cause data dependency. */
|
||||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
|
||||||
read_char_range(common, common->nlmin, common->nlmax, NULL, TRUE);
|
read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_UPDATE_STR_PTR_PRECISE);
|
||||||
check_newlinechar(common, common->nltype, &newline, TRUE);
|
check_newlinechar(common, common->nltype, &newline, TRUE);
|
||||||
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
|
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
|
||||||
JUMPHERE(end);
|
JUMPHERE(end);
|
||||||
|
@ -6206,7 +6347,7 @@ move_back(common, NULL, FALSE);
|
||||||
loop = LABEL();
|
loop = LABEL();
|
||||||
common->ff_newline_shortcut = loop;
|
common->ff_newline_shortcut = loop;
|
||||||
|
|
||||||
read_char_range(common, common->nlmin, common->nlmax, NULL, TRUE);
|
read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_UPDATE_STR_PTR_PRECISE);
|
||||||
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
|
||||||
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
|
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
|
||||||
foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
|
foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
|
||||||
|
@ -6451,7 +6592,8 @@ else
|
||||||
{
|
{
|
||||||
move_back(common, &invalid_utf, FALSE);
|
move_back(common, &invalid_utf, FALSE);
|
||||||
check_start_used_ptr(common);
|
check_start_used_ptr(common);
|
||||||
read_char_range(common, 0, READ_CHAR_MAX, &invalid_utf, TRUE);
|
/* No need precise read since match fails anyway. */
|
||||||
|
read_char(common, 0, READ_CHAR_MAX, &invalid_utf, READ_CHAR_UPDATE_STR_PTR);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Testing char type. */
|
/* Testing char type. */
|
||||||
|
@ -7394,7 +7536,10 @@ SLJIT_ASSERT(compares > 0);
|
||||||
|
|
||||||
/* We are not necessary in utf mode even in 8 bit mode. */
|
/* We are not necessary in utf mode even in 8 bit mode. */
|
||||||
cc = ccbegin;
|
cc = ccbegin;
|
||||||
read_char_range(common, min, max, ((cc[-1] & XCL_NOT) != 0) ? backtracks : NULL, (cc[-1] & XCL_NOT) != 0);
|
if ((cc[-1] & XCL_NOT) != 0)
|
||||||
|
read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
else
|
||||||
|
read_char(common, min, max, NULL, 0);
|
||||||
|
|
||||||
if ((cc[-1] & XCL_HASPROP) == 0)
|
if ((cc[-1] & XCL_HASPROP) == 0)
|
||||||
{
|
{
|
||||||
|
@ -7920,13 +8065,13 @@ switch(type)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
|
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
|
||||||
read_char_range(common, common->nlmin, common->nlmax, NULL, TRUE);
|
read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
|
add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
|
||||||
sljit_set_current_flags(compiler, SLJIT_SET_Z);
|
sljit_set_current_flags(compiler, SLJIT_SET_Z);
|
||||||
add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
|
add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
|
||||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
|
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
|
||||||
}
|
}
|
||||||
JUMPHERE(jump[2]);
|
JUMPHERE(jump[2]);
|
||||||
JUMPHERE(jump[3]);
|
JUMPHERE(jump[3]);
|
||||||
|
@ -8325,7 +8470,7 @@ switch(type)
|
||||||
case OP_ANY:
|
case OP_ANY:
|
||||||
if (check_str_ptr)
|
if (check_str_ptr)
|
||||||
detect_partial_match(common, backtracks);
|
detect_partial_match(common, backtracks);
|
||||||
read_char_range(common, common->nlmin, common->nlmax, backtracks, TRUE);
|
read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
|
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
|
||||||
{
|
{
|
||||||
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
|
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
|
||||||
|
@ -8352,7 +8497,7 @@ switch(type)
|
||||||
{
|
{
|
||||||
if (common->invalid_utf)
|
if (common->invalid_utf)
|
||||||
{
|
{
|
||||||
read_char_range(common, 0, READ_CHAR_MAX, backtracks, TRUE);
|
read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
return cc;
|
return cc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8402,7 +8547,7 @@ switch(type)
|
||||||
case OP_ANYNL:
|
case OP_ANYNL:
|
||||||
if (check_str_ptr)
|
if (check_str_ptr)
|
||||||
detect_partial_match(common, backtracks);
|
detect_partial_match(common, backtracks);
|
||||||
read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, NULL, FALSE);
|
read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
|
||||||
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
|
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
|
||||||
/* We don't need to handle soft partial matching case. */
|
/* We don't need to handle soft partial matching case. */
|
||||||
end_list = NULL;
|
end_list = NULL;
|
||||||
|
@ -8425,7 +8570,12 @@ switch(type)
|
||||||
case OP_HSPACE:
|
case OP_HSPACE:
|
||||||
if (check_str_ptr)
|
if (check_str_ptr)
|
||||||
detect_partial_match(common, backtracks);
|
detect_partial_match(common, backtracks);
|
||||||
read_char_range(common, 0x9, 0x3000, NULL, type == OP_NOT_HSPACE);
|
|
||||||
|
if (type == OP_NOT_HSPACE)
|
||||||
|
read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
else
|
||||||
|
read_char(common, 0x9, 0x3000, NULL, 0);
|
||||||
|
|
||||||
add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
|
add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
|
||||||
sljit_set_current_flags(compiler, SLJIT_SET_Z);
|
sljit_set_current_flags(compiler, SLJIT_SET_Z);
|
||||||
add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
|
add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
|
||||||
|
@ -8435,7 +8585,12 @@ switch(type)
|
||||||
case OP_VSPACE:
|
case OP_VSPACE:
|
||||||
if (check_str_ptr)
|
if (check_str_ptr)
|
||||||
detect_partial_match(common, backtracks);
|
detect_partial_match(common, backtracks);
|
||||||
read_char_range(common, 0xa, 0x2029, NULL, type == OP_NOT_VSPACE);
|
|
||||||
|
if (type == OP_NOT_VSPACE)
|
||||||
|
read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
else
|
||||||
|
read_char(common, 0xa, 0x2029, NULL, 0);
|
||||||
|
|
||||||
add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
|
add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
|
||||||
sljit_set_current_flags(compiler, SLJIT_SET_Z);
|
sljit_set_current_flags(compiler, SLJIT_SET_Z);
|
||||||
add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
|
add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
|
||||||
|
@ -8477,6 +8632,7 @@ switch(type)
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
|
if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (common->mode == PCRE2_JIT_COMPLETE && check_str_ptr
|
if (common->mode == PCRE2_JIT_COMPLETE && check_str_ptr
|
||||||
&& (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
|
&& (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
|
||||||
{
|
{
|
||||||
|
@ -8504,12 +8660,13 @@ switch(type)
|
||||||
|
|
||||||
if (type == OP_CHAR || !char_has_othercase(common, cc))
|
if (type == OP_CHAR || !char_has_othercase(common, cc))
|
||||||
{
|
{
|
||||||
read_char_range(common, c, c, NULL, FALSE);
|
read_char(common, c, c, NULL, 0);
|
||||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
||||||
return cc + length;
|
return cc + length;
|
||||||
}
|
}
|
||||||
|
|
||||||
oc = char_othercase(common, c);
|
oc = char_othercase(common, c);
|
||||||
read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, NULL, FALSE);
|
read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
|
||||||
bit = c ^ oc;
|
bit = c ^ oc;
|
||||||
if (is_powerof2(bit))
|
if (is_powerof2(bit))
|
||||||
{
|
{
|
||||||
|
@ -8517,6 +8674,7 @@ switch(type)
|
||||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
|
||||||
return cc + length;
|
return cc + length;
|
||||||
}
|
}
|
||||||
|
|
||||||
jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
|
jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
|
||||||
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
|
||||||
JUMPHERE(jump[0]);
|
JUMPHERE(jump[0]);
|
||||||
|
@ -8533,7 +8691,7 @@ switch(type)
|
||||||
{
|
{
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
c = *cc;
|
c = *cc;
|
||||||
if (c < 128)
|
if (c < 128 && !common->invalid_utf)
|
||||||
{
|
{
|
||||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
|
||||||
if (type == OP_NOT || !char_has_othercase(common, cc))
|
if (type == OP_NOT || !char_has_othercase(common, cc))
|
||||||
|
@ -8564,13 +8722,13 @@ switch(type)
|
||||||
|
|
||||||
if (type == OP_NOT || !char_has_othercase(common, cc))
|
if (type == OP_NOT || !char_has_othercase(common, cc))
|
||||||
{
|
{
|
||||||
read_char_range(common, c, c, NULL, TRUE);
|
read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
oc = char_othercase(common, c);
|
oc = char_othercase(common, c);
|
||||||
read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, NULL, TRUE);
|
read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
bit = c ^ oc;
|
bit = c ^ oc;
|
||||||
if (is_powerof2(bit))
|
if (is_powerof2(bit))
|
||||||
{
|
{
|
||||||
|
@ -8592,9 +8750,15 @@ switch(type)
|
||||||
|
|
||||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
|
bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
|
||||||
read_char_range(common, 0, bit, NULL, type == OP_NCLASS);
|
if (type == OP_NCLASS)
|
||||||
|
read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
else
|
||||||
|
read_char(common, 0, bit, NULL, 0);
|
||||||
#else
|
#else
|
||||||
read_char_range(common, 0, 255, NULL, type == OP_NCLASS);
|
if (type == OP_NCLASS)
|
||||||
|
read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
else
|
||||||
|
read_char(common, 0, 255, NULL, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
|
if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
|
||||||
|
@ -8788,7 +8952,6 @@ jump_list *no_match = NULL;
|
||||||
int source_reg = COUNT_MATCH;
|
int source_reg = COUNT_MATCH;
|
||||||
int source_end_reg = ARGUMENTS;
|
int source_end_reg = ARGUMENTS;
|
||||||
int char1_reg = STACK_LIMIT;
|
int char1_reg = STACK_LIMIT;
|
||||||
BOOL saved_invalid_utf;
|
|
||||||
#endif /* SUPPORT_UNICODE */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
if (ref)
|
if (ref)
|
||||||
|
@ -8830,17 +8993,14 @@ if (common->utf && *cc == OP_REFI)
|
||||||
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
|
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
|
||||||
OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
|
OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
|
||||||
|
|
||||||
saved_invalid_utf = common->invalid_utf;
|
read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
|
||||||
common->invalid_utf = FALSE;
|
|
||||||
read_char_range(common, 0, READ_CHAR_MAX, NULL, TRUE);
|
|
||||||
common->invalid_utf = saved_invalid_utf;
|
|
||||||
|
|
||||||
OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
|
OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
|
||||||
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
|
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
|
||||||
OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
|
OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
|
||||||
|
|
||||||
/* Read second character. */
|
/* Read second character. */
|
||||||
read_char_range(common, 0, READ_CHAR_MAX, &no_match, TRUE);
|
read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
|
||||||
|
|
||||||
CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
|
CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
|
||||||
|
|
||||||
|
@ -13572,6 +13732,11 @@ if (common->utfreadchar != NULL)
|
||||||
set_jumps(common->utfreadchar, LABEL());
|
set_jumps(common->utfreadchar, LABEL());
|
||||||
do_utfreadchar(common);
|
do_utfreadchar(common);
|
||||||
}
|
}
|
||||||
|
if (common->utfreadchar_invalid_precise != NULL)
|
||||||
|
{
|
||||||
|
set_jumps(common->utfreadchar_invalid_precise, LABEL());
|
||||||
|
do_utfreadchar_invalid_precise(common);
|
||||||
|
}
|
||||||
if (common->utfreadtype8 != NULL)
|
if (common->utfreadtype8 != NULL)
|
||||||
{
|
{
|
||||||
set_jumps(common->utfreadtype8, LABEL());
|
set_jumps(common->utfreadtype8, LABEL());
|
||||||
|
|
|
@ -1755,6 +1755,41 @@ static int regression_tests(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE && (defined SUPPORT_PCRE2_8 || defined SUPPORT_PCRE2_16)
|
||||||
|
|
||||||
|
static int check_invalid_utf_result(int pattern_index, char *type, int result,
|
||||||
|
int match_start, int match_end, PCRE2_SIZE *ovector)
|
||||||
|
{
|
||||||
|
if (match_start < 0) {
|
||||||
|
if (result != -1) {
|
||||||
|
printf("Pattern[%d] %s result is not -1.\n", pattern_index, type);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result <= 0) {
|
||||||
|
printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ovector[0] != (PCRE2_SIZE)match_start) {
|
||||||
|
printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
|
||||||
|
pattern_index, type, (int)ovector[0], match_start);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ovector[1] != (PCRE2_SIZE)match_end) {
|
||||||
|
printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
|
||||||
|
pattern_index, type, (int)ovector[1], match_end);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SUPPORT_UNICODE && (SUPPORT_PCRE2_8 || SUPPORT_PCRE2_16) */
|
||||||
|
|
||||||
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
|
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
|
||||||
|
|
||||||
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
||||||
|
@ -1767,121 +1802,132 @@ struct invalid_utf8_regression_test_case {
|
||||||
int start_offset;
|
int start_offset;
|
||||||
int skip_left;
|
int skip_left;
|
||||||
int skip_right;
|
int skip_right;
|
||||||
int expected_result;
|
int match_start;
|
||||||
|
int match_end;
|
||||||
const char *pattern[2];
|
const char *pattern[2];
|
||||||
const char *input;
|
const char *input;
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
|
static struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
|
||||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
|
{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xf0\x90\x80\x80" },
|
{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
|
||||||
{ UDA, CI, 0, 0, 1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
|
{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xef\xbf\xbf#" },
|
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xef\xbf\xbf" },
|
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xe0\xa0\x80#" },
|
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xe0\xa0\x80" },
|
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" },
|
||||||
{ UDA, CI, 0, 0, 2, -1, { ".", NULL }, "\xef\xbf\xbf#" },
|
{ UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" },
|
||||||
{ UDA, CI, 0, 0, 1, -1, { ".", NULL }, "\xef\xbf\xbf" },
|
{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xef\xbf\x7f#" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xef\xbf\xc0" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xe0\x9f\xbf" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xed\x9f\xbf#" },
|
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xed\xa0\x80#" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xee\x80\x80#" },
|
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xed\xbf\xbf#" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xdf\xbf##" },
|
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xdf\xbf#" },
|
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xdf\xbf" },
|
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xc2\x80##" },
|
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xc2\x80#" },
|
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xc2\x80" },
|
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xe0\x80##" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xdf\xc0##" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xe0\x80" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xdf\xc0" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xc1\xbf##" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xc1\xbf" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\x80###" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\x80" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf8###" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" },
|
||||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf8" },
|
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
|
{ UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
|
||||||
|
|
||||||
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
|
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
|
||||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80#" },
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80#" },
|
||||||
{ UDA, CPI, 4, 1, 0, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf#" },
|
{ UDA, CPI, 4, 1, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf#" },
|
||||||
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "#\xef\xbf\xbf#" },
|
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
|
||||||
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "#\xe0\xa0\x80#" },
|
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
|
||||||
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
|
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
|
||||||
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
|
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
|
||||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf#" },
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf#" },
|
||||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80#" },
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80#" },
|
||||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80#" },
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80#" },
|
||||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff#" },
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff#" },
|
||||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf#" },
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf#" },
|
||||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80#" },
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80#" },
|
||||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80#" },
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80#" },
|
||||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf#" },
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf#" },
|
||||||
{ UDA, CPI, 4, 2, 0, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80#" },
|
{ UDA, CPI, 4, 2, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80#" },
|
||||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "#\xf0\x80\x80#" },
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80#" },
|
||||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "#\xed\xa0\x80#" },
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80#" },
|
||||||
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "##\xdf\xbf#" },
|
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
|
||||||
{ UDA, CPI, 4, 2, 0, 1, { "\\B", NULL }, "##\xdf\xbf#" },
|
{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
|
||||||
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "##\xc2\x80#" },
|
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
|
||||||
{ UDA, CPI, 4, 2, 0, 1, { "\\B", NULL }, "##\xc2\x80#" },
|
{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
|
||||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "##\xc1\xbf#" },
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf#" },
|
||||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "##\xdf\xc0#" },
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0#" },
|
||||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "##\xe0\x80#" },
|
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80#" },
|
||||||
{ UDA, CPI, 4, 2, 0, -1, { "\\B", "\\b" }, "##\xe0\x80#" },
|
{ UDA, CPI, 4, 2, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80#" },
|
||||||
|
|
||||||
{ UDA, CPI, 3, 0, 0, 1, { "\\B", NULL }, "\xef\xbf\xbf#" },
|
{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
|
||||||
{ UDA, CPI, 3, 0, 0, 1, { "\\B", NULL }, "\xe0\xa0\x80#" },
|
{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
|
||||||
{ UDA, CPI, 3, 0, 0, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf#" },
|
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf#" },
|
||||||
{ UDA, CPI, 3, 1, 0, -1, { "\\B", "\\b" }, "\xef\xbf\xbf#" },
|
{ UDA, CPI, 3, 1, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf#" },
|
||||||
{ UDA, CPI, 3, 0, 0, -1, { "\\B", "\\b" }, "\xdf\x80\x80#" },
|
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80#" },
|
||||||
{ UDA, CPI, 3, 0, 0, -1, { "\\B", "\\b" }, "\xef\xbf\xff#" },
|
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff#" },
|
||||||
{ UDA, CPI, 3, 0, 0, -1, { "\\B", "\\b" }, "\xef\xff\xbf#" },
|
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf#" },
|
||||||
{ UDA, CPI, 3, 0, 0, -1, { "\\B", "\\b" }, "\xed\xbf\xbf#" },
|
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf#" },
|
||||||
|
|
||||||
{ UDA, CPI, 2, 0, 0, 1, { "\\B", NULL }, "\xdf\xbf#" },
|
{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
|
||||||
{ UDA, CPI, 2, 0, 0, 1, { "\\B", NULL }, "\xc2\x80#" },
|
{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
|
||||||
{ UDA, CPI, 2, 1, 0, -1, { "\\B", "\\b" }, "\xdf\xbf#" },
|
{ UDA, CPI, 2, 1, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xbf#" },
|
||||||
{ UDA, CPI, 2, 0, 0, -1, { "\\B", "\\b" }, "\xc1\xbf#" },
|
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf#" },
|
||||||
{ UDA, CPI, 2, 0, 0, -1, { "\\B", "\\b" }, "\xe0\x80#" },
|
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80#" },
|
||||||
{ UDA, CPI, 2, 0, 0, -1, { "\\B", "\\b" }, "\xdf\xff#" },
|
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff#" },
|
||||||
{ UDA, CPI, 2, 0, 0, -1, { "\\B", "\\b" }, "\xff\xbf#" },
|
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf#" },
|
||||||
|
|
||||||
{ UDA, CPI, 1, 0, 0, 1, { "\\B", NULL }, "\x7f#" },
|
{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
|
||||||
{ UDA, CPI, 1, 0, 0, 1, { "\\B", NULL }, "\x01#" },
|
{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
|
||||||
{ UDA, CPI, 1, 0, 0, -1, { "\\B", "\\b" }, "\x80#" },
|
{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80#" },
|
||||||
{ UDA, CPI, 1, 0, 0, -1, { "\\B", "\\b" }, "\x80#" },
|
{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80#" },
|
||||||
|
|
||||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
|
||||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, { "(.)\\1", NULL }, "a\xff" },
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
|
||||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
|
||||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
|
||||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
|
||||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
|
||||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
|
||||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
|
||||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
|
||||||
|
|
||||||
{ UDA, CPI, 0, 0, 0, 1, { "\\X", NULL }, "A" },
|
{ UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" },
|
||||||
{ UDA, CPI, 0, 0, 0, -1, { "\\X", NULL }, "\xff" },
|
{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" },
|
||||||
{ UDA, CPI, 0, 0, 0, 1, { "\\X", NULL }, "\xc3\xa1" },
|
{ UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" },
|
||||||
{ UDA, CPI, 0, 0, 1, -1, { "\\X", NULL }, "\xc3\xa1" },
|
{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" },
|
||||||
{ UDA, CPI, 0, 0, 0, -1, { "\\X", NULL }, "\xc3\x7f" },
|
{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" },
|
||||||
{ UDA, CPI, 0, 0, 0, 1, { "\\X", NULL }, "\xe1\xbd\xb8" },
|
{ UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" },
|
||||||
{ UDA, CPI, 0, 0, 1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
|
{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
|
||||||
{ UDA, CPI, 0, 0, 0, 1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
|
{ UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" },
|
||||||
{ UDA, CPI, 0, 0, 1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
|
{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
|
||||||
|
|
||||||
{ 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"},
|
||||||
|
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"},
|
||||||
|
|
||||||
|
{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
#undef UDA
|
#undef UDA
|
||||||
|
@ -1889,17 +1935,18 @@ static struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cas
|
||||||
#undef CPI
|
#undef CPI
|
||||||
|
|
||||||
static int run_invalid_utf8_test(struct invalid_utf8_regression_test_case *current,
|
static int run_invalid_utf8_test(struct invalid_utf8_regression_test_case *current,
|
||||||
int pattern_index, int i, pcre2_match_data_8 *mdata)
|
int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata)
|
||||||
{
|
{
|
||||||
pcre2_code_8 *code;
|
pcre2_code_8 *code;
|
||||||
int result, errorcode;
|
int result, errorcode;
|
||||||
PCRE2_SIZE length, erroroffset;
|
PCRE2_SIZE length, erroroffset;
|
||||||
|
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata);
|
||||||
|
|
||||||
if (current->pattern[i] == NULL)
|
if (current->pattern[i] == NULL)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
|
code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
|
||||||
current->compile_options, &errorcode, &erroroffset, NULL);
|
current->compile_options, &errorcode, &erroroffset, ccontext);
|
||||||
|
|
||||||
if (!code) {
|
if (!code) {
|
||||||
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
||||||
|
@ -1918,8 +1965,7 @@ static int run_invalid_utf8_test(struct invalid_utf8_regression_test_case *curre
|
||||||
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
|
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
|
||||||
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
||||||
|
|
||||||
if (result != current->expected_result) {
|
if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
|
||||||
printf("Pattern[%d:0] match result %d differs from expected %d.\n", pattern_index, result, current->expected_result);
|
|
||||||
pcre2_code_free_8(code);
|
pcre2_code_free_8(code);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1929,8 +1975,7 @@ static int run_invalid_utf8_test(struct invalid_utf8_regression_test_case *curre
|
||||||
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
|
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
|
||||||
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
||||||
|
|
||||||
if (result != current->expected_result) {
|
if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
|
||||||
printf("Pattern[%d:0] partial match result %d differs from expected %d.\n", pattern_index, result, current->expected_result);
|
|
||||||
pcre2_code_free_8(code);
|
pcre2_code_free_8(code);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1943,12 +1988,15 @@ static int run_invalid_utf8_test(struct invalid_utf8_regression_test_case *curre
|
||||||
static int invalid_utf8_regression_tests(void)
|
static int invalid_utf8_regression_tests(void)
|
||||||
{
|
{
|
||||||
struct invalid_utf8_regression_test_case *current;
|
struct invalid_utf8_regression_test_case *current;
|
||||||
|
pcre2_compile_context_8 *ccontext;
|
||||||
pcre2_match_data_8 *mdata;
|
pcre2_match_data_8 *mdata;
|
||||||
int total = 0, successful = 0;
|
int total = 0, successful = 0;
|
||||||
int result;
|
int result;
|
||||||
|
|
||||||
printf("\nRunning invalid-utf8 JIT regression tests\n");
|
printf("\nRunning invalid-utf8 JIT regression tests\n");
|
||||||
|
|
||||||
|
ccontext = pcre2_compile_context_create_8(NULL);
|
||||||
|
pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
|
||||||
mdata = pcre2_match_data_create_8(4, NULL);
|
mdata = pcre2_match_data_create_8(4, NULL);
|
||||||
|
|
||||||
for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
|
for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
|
||||||
|
@ -1956,9 +2004,9 @@ static int invalid_utf8_regression_tests(void)
|
||||||
total++;
|
total++;
|
||||||
|
|
||||||
result = 1;
|
result = 1;
|
||||||
if (!run_invalid_utf8_test(current, total - 1, 0, mdata))
|
if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
|
||||||
result = 0;
|
result = 0;
|
||||||
if (!run_invalid_utf8_test(current, total - 1, 1, mdata))
|
if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
|
||||||
result = 0;
|
result = 0;
|
||||||
|
|
||||||
if (result) {
|
if (result) {
|
||||||
|
@ -1974,6 +2022,7 @@ static int invalid_utf8_regression_tests(void)
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
pcre2_match_data_free_8(mdata);
|
pcre2_match_data_free_8(mdata);
|
||||||
|
pcre2_compile_context_free_8(ccontext);
|
||||||
|
|
||||||
if (total == successful) {
|
if (total == successful) {
|
||||||
printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
|
printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
|
||||||
|
@ -2005,7 +2054,8 @@ struct invalid_utf16_regression_test_case {
|
||||||
int start_offset;
|
int start_offset;
|
||||||
int skip_left;
|
int skip_left;
|
||||||
int skip_right;
|
int skip_right;
|
||||||
int expected_result;
|
int match_start;
|
||||||
|
int match_end;
|
||||||
const PCRE2_UCHAR16 *pattern[2];
|
const PCRE2_UCHAR16 *pattern[2];
|
||||||
const PCRE2_UCHAR16 *input;
|
const PCRE2_UCHAR16 *input;
|
||||||
};
|
};
|
||||||
|
@ -2024,41 +2074,41 @@ static PCRE2_UCHAR16 test6[] = { 'a', 'A', 0xdc28, 0 };
|
||||||
static PCRE2_UCHAR16 test7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
|
static PCRE2_UCHAR16 test7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
|
||||||
|
|
||||||
static struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
|
static struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
|
||||||
{ UDA, CI, 0, 0, 0, 1, { allany, NULL }, test1 },
|
{ UDA, CI, 0, 0, 0, 0, 1, { allany, NULL }, test1 },
|
||||||
{ UDA, CI, 1, 0, 0, 1, { allany, NULL }, test1 },
|
{ UDA, CI, 1, 0, 0, 1, 2, { allany, NULL }, test1 },
|
||||||
{ UDA, CI, 2, 0, 0, 1, { allany, NULL }, test1 },
|
{ UDA, CI, 2, 0, 0, 2, 3, { allany, NULL }, test1 },
|
||||||
{ UDA, CI, 3, 0, 0, 1, { allany, NULL }, test1 },
|
{ UDA, CI, 3, 0, 0, 3, 4, { allany, NULL }, test1 },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { allany, NULL }, test2 },
|
{ UDA, CI, 0, 0, 0, 0, 2, { allany, NULL }, test2 },
|
||||||
{ UDA, CI, 0, 0, 2, -1, { allany, NULL }, test2 },
|
{ UDA, CI, 0, 0, 2, -1, -1, { allany, NULL }, test2 },
|
||||||
{ UDA, CI, 1, 0, 0, -1, { allany, NULL }, test2 },
|
{ UDA, CI, 1, 0, 0, -1, -1, { allany, NULL }, test2 },
|
||||||
{ UDA, CI, 0, 0, 0, 1, { allany, NULL }, test3 },
|
{ UDA, CI, 0, 0, 0, 0, 2, { allany, NULL }, test3 },
|
||||||
{ UDA, CI, 0, 0, 2, -1, { allany, NULL }, test3 },
|
{ UDA, CI, 0, 0, 2, -1, -1, { allany, NULL }, test3 },
|
||||||
{ UDA, CI, 1, 0, 0, -1, { allany, NULL }, test3 },
|
{ UDA, CI, 1, 0, 0, -1, -1, { allany, NULL }, test3 },
|
||||||
|
|
||||||
{ UDA, CPI, 1, 0, 0, 1, { non_word_boundary, NULL }, test1 },
|
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary, NULL }, test1 },
|
||||||
{ UDA, CPI, 2, 0, 0, 1, { non_word_boundary, NULL }, test1 },
|
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary, NULL }, test1 },
|
||||||
{ UDA, CPI, 3, 0, 0, 1, { non_word_boundary, NULL }, test1 },
|
{ UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary, NULL }, test1 },
|
||||||
{ UDA, CPI, 4, 0, 0, 1, { non_word_boundary, NULL }, test1 },
|
{ UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary, NULL }, test1 },
|
||||||
{ UDA, CPI, 2, 0, 0, 1, { non_word_boundary, NULL }, test2 },
|
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary, NULL }, test2 },
|
||||||
{ UDA, CPI, 2, 0, 0, 1, { non_word_boundary, NULL }, test3 },
|
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary, NULL }, test3 },
|
||||||
{ UDA, CPI, 2, 1, 0, -1, { non_word_boundary, word_boundary }, test2 },
|
{ UDA, CPI, 2, 1, 0, -1, -1, { non_word_boundary, word_boundary }, test2 },
|
||||||
{ UDA, CPI, 2, 1, 0, -1, { non_word_boundary, word_boundary }, test3 },
|
{ UDA, CPI, 2, 1, 0, -1, -1, { non_word_boundary, word_boundary }, test3 },
|
||||||
{ UDA, CPI, 2, 0, 0, -1, { non_word_boundary, word_boundary }, test4 },
|
{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary, word_boundary }, test4 },
|
||||||
{ UDA, CPI, 2, 0, 0, -1, { non_word_boundary, word_boundary }, test5 },
|
{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary, word_boundary }, test5 },
|
||||||
|
|
||||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { backreference, NULL }, test6 },
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference, NULL }, test6 },
|
||||||
{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, { backreference, NULL }, test6 },
|
{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference, NULL }, test6 },
|
||||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { backreference, NULL }, test7 },
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference, NULL }, test7 },
|
||||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, { backreference, NULL }, test7 },
|
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference, NULL }, test7 },
|
||||||
|
|
||||||
{ UDA, CPI, 0, 0, 0, 1, { grapheme, NULL }, test6 },
|
{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme, NULL }, test6 },
|
||||||
{ UDA, CPI, 1, 0, 0, 1, { grapheme, NULL }, test6 },
|
{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme, NULL }, test6 },
|
||||||
{ UDA, CPI, 2, 0, 0, -1, { grapheme, NULL }, test6 },
|
{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme, NULL }, test6 },
|
||||||
{ UDA, CPI, 0, 0, 0, 1, { grapheme, NULL }, test7 },
|
{ UDA, CPI, 0, 0, 0, 0, 2, { grapheme, NULL }, test7 },
|
||||||
{ UDA, CPI, 2, 0, 0, 1, { grapheme, NULL }, test7 },
|
{ UDA, CPI, 2, 0, 0, 2, 4, { grapheme, NULL }, test7 },
|
||||||
{ UDA, CPI, 1, 0, 0, -1, { grapheme, NULL }, test7 },
|
{ UDA, CPI, 1, 0, 0, -1, -1, { grapheme, NULL }, test7 },
|
||||||
|
|
||||||
{ 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
#undef UDA
|
#undef UDA
|
||||||
|
@ -2066,18 +2116,19 @@ static struct invalid_utf16_regression_test_case invalid_utf16_regression_test_c
|
||||||
#undef CPI
|
#undef CPI
|
||||||
|
|
||||||
static int run_invalid_utf16_test(struct invalid_utf16_regression_test_case *current,
|
static int run_invalid_utf16_test(struct invalid_utf16_regression_test_case *current,
|
||||||
int pattern_index, int i, pcre2_match_data_16 *mdata)
|
int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata)
|
||||||
{
|
{
|
||||||
pcre2_code_16 *code;
|
pcre2_code_16 *code;
|
||||||
int result, errorcode;
|
int result, errorcode;
|
||||||
PCRE2_SIZE length, erroroffset;
|
PCRE2_SIZE length, erroroffset;
|
||||||
const PCRE2_UCHAR16 *input;
|
const PCRE2_UCHAR16 *input;
|
||||||
|
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata);
|
||||||
|
|
||||||
if (current->pattern[i] == NULL)
|
if (current->pattern[i] == NULL)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
|
code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
|
||||||
current->compile_options, &errorcode, &erroroffset, NULL);
|
current->compile_options, &errorcode, &erroroffset, ccontext);
|
||||||
|
|
||||||
if (!code) {
|
if (!code) {
|
||||||
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
||||||
|
@ -2102,8 +2153,7 @@ static int run_invalid_utf16_test(struct invalid_utf16_regression_test_case *cur
|
||||||
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
|
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
|
||||||
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
||||||
|
|
||||||
if (result != current->expected_result) {
|
if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
|
||||||
printf("Pattern[%d:0] match result %d differs from expected %d.\n", pattern_index, result, current->expected_result);
|
|
||||||
pcre2_code_free_16(code);
|
pcre2_code_free_16(code);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -2113,8 +2163,7 @@ static int run_invalid_utf16_test(struct invalid_utf16_regression_test_case *cur
|
||||||
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
|
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
|
||||||
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
||||||
|
|
||||||
if (result != current->expected_result) {
|
if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
|
||||||
printf("Pattern[%d:0] partial match result %d differs from expected %d.\n", pattern_index, result, current->expected_result);
|
|
||||||
pcre2_code_free_16(code);
|
pcre2_code_free_16(code);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -2127,12 +2176,15 @@ static int run_invalid_utf16_test(struct invalid_utf16_regression_test_case *cur
|
||||||
static int invalid_utf16_regression_tests(void)
|
static int invalid_utf16_regression_tests(void)
|
||||||
{
|
{
|
||||||
struct invalid_utf16_regression_test_case *current;
|
struct invalid_utf16_regression_test_case *current;
|
||||||
|
pcre2_compile_context_16 *ccontext;
|
||||||
pcre2_match_data_16 *mdata;
|
pcre2_match_data_16 *mdata;
|
||||||
int total = 0, successful = 0;
|
int total = 0, successful = 0;
|
||||||
int result;
|
int result;
|
||||||
|
|
||||||
printf("\nRunning invalid-utf16 JIT regression tests\n");
|
printf("\nRunning invalid-utf16 JIT regression tests\n");
|
||||||
|
|
||||||
|
ccontext = pcre2_compile_context_create_16(NULL);
|
||||||
|
pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY);
|
||||||
mdata = pcre2_match_data_create_16(4, NULL);
|
mdata = pcre2_match_data_create_16(4, NULL);
|
||||||
|
|
||||||
for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
|
for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
|
||||||
|
@ -2140,9 +2192,9 @@ static int invalid_utf16_regression_tests(void)
|
||||||
total++;
|
total++;
|
||||||
|
|
||||||
result = 1;
|
result = 1;
|
||||||
if (!run_invalid_utf16_test(current, total - 1, 0, mdata))
|
if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
|
||||||
result = 0;
|
result = 0;
|
||||||
if (!run_invalid_utf16_test(current, total - 1, 1, mdata))
|
if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
|
||||||
result = 0;
|
result = 0;
|
||||||
|
|
||||||
if (result) {
|
if (result) {
|
||||||
|
@ -2158,6 +2210,7 @@ static int invalid_utf16_regression_tests(void)
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
pcre2_match_data_free_16(mdata);
|
pcre2_match_data_free_16(mdata);
|
||||||
|
pcre2_compile_context_free_16(ccontext);
|
||||||
|
|
||||||
if (total == successful) {
|
if (total == successful) {
|
||||||
printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
|
printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
|
||||||
|
|
Loading…
Reference in New Issue