Rework word boundary in JIT.

This commit is contained in:
Zoltán Herczeg 2019-05-23 07:46:10 +00:00
parent 342c16ecd3
commit 2ad4329f83
2 changed files with 93 additions and 74 deletions

View File

@ -3898,9 +3898,9 @@ if (common->utf && negated)
static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid) static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
{ {
/* Goes one character back. TMP2 must contain the start of /* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
the subject buffer. Affects STR_PTR and TMP1. Does not modify TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
STR_PTR for invalid character sequences. */ and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
DEFINE_COMPILER; DEFINE_COMPILER;
SLJIT_UNUSED_ARG(backtracks); SLJIT_UNUSED_ARG(backtracks);
@ -4440,7 +4440,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
static void do_utfpeakcharback(compiler_common *common) static void do_utfpeakcharback(compiler_common *common)
{ {
/* Peak a character back. */ /* Peak a character back. Does not modify STR_PTR. */
DEFINE_COMPILER; DEFINE_COMPILER;
struct sljit_jump *jump[2]; struct sljit_jump *jump[2];
@ -4477,7 +4477,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
static void do_utfpeakcharback_invalid(compiler_common *common) static void do_utfpeakcharback_invalid(compiler_common *common)
{ {
/* Peak a character back. */ /* Peak a character back. Does not modify STR_PTR. */
DEFINE_COMPILER; DEFINE_COMPILER;
sljit_s32 i; sljit_s32 i;
sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV); sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
@ -4705,7 +4705,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
static void do_utfpeakcharback_invalid(compiler_common *common) static void do_utfpeakcharback_invalid(compiler_common *common)
{ {
/* Peak a character back. */ /* Peak a character back. Does not modify STR_PTR. */
DEFINE_COMPILER; DEFINE_COMPILER;
struct sljit_jump *jump; struct sljit_jump *jump;
struct sljit_jump *exit_invalid[3]; struct sljit_jump *exit_invalid[3];
@ -4819,18 +4819,12 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
// PH hacking /* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
//fprintf(stderr, "~~A\n");
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
// OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
} }
@ -6770,7 +6764,11 @@ static void check_wordboundary(compiler_common *common)
DEFINE_COMPILER; DEFINE_COMPILER;
struct sljit_jump *skipread; struct sljit_jump *skipread;
jump_list *skipread_list = NULL; jump_list *skipread_list = NULL;
jump_list *invalid_utf = NULL; #ifdef SUPPORT_UNICODE
struct sljit_label *valid_utf;
jump_list *invalid_utf1 = NULL;
#endif /* SUPPORT_UNICODE */
jump_list *invalid_utf2 = NULL;
#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
struct sljit_jump *jump; struct sljit_jump *jump;
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */ #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
@ -6784,14 +6782,30 @@ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
if (common->mode == PCRE2_JIT_COMPLETE) #ifdef SUPPORT_UNICODE
peek_char_back(common, READ_CHAR_MAX, &invalid_utf); if (common->invalid_utf)
else
{ {
move_back(common, &invalid_utf, FALSE); peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
if (common->mode != PCRE2_JIT_COMPLETE)
{
OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
move_back(common, NULL, TRUE);
check_start_used_ptr(common); check_start_used_ptr(common);
/* No need precise read since match fails anyway. */ OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
read_char(common, 0, READ_CHAR_MAX, &invalid_utf, READ_CHAR_UPDATE_STR_PTR); }
}
else
#endif /* SUPPORT_UNICODE */
{
if (common->mode == PCRE2_JIT_COMPLETE)
peek_char_back(common, READ_CHAR_MAX, NULL);
else
{
move_back(common, NULL, TRUE);
check_start_used_ptr(common);
read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
}
} }
/* Testing char type. */ /* Testing char type. */
@ -6835,10 +6849,13 @@ JUMPHERE(skipread);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
check_str_end(common, &skipread_list); check_str_end(common, &skipread_list);
peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf); peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
/* Testing char type. This is a code duplication. */ /* Testing char type. This is a code duplication. */
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
valid_utf = LABEL();
if (common->use_ucp) if (common->use_ucp)
{ {
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
@ -6884,13 +6901,19 @@ sljit_emit_fast_return(compiler, TMP1, 0);
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
if (common->invalid_utf) if (common->invalid_utf)
{ {
SLJIT_ASSERT(invalid_utf != NULL); set_jumps(invalid_utf1, LABEL());
peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
set_jumps(invalid_utf, LABEL());
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
sljit_emit_fast_return(compiler, TMP1, 0); sljit_emit_fast_return(compiler, TMP1, 0);
return;
set_jumps(invalid_utf2, LABEL());
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
sljit_emit_fast_return(compiler, TMP1, 0);
} }
#endif /* SUPPORT_UNICODE */ #endif /* SUPPORT_UNICODE */
} }
@ -8224,9 +8247,7 @@ switch(type)
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
if (common->invalid_utf) if (common->invalid_utf)
{ {
OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_SIG_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0); add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
add_jump(compiler, backtracks, JUMP(SLJIT_SIG_LESS));
add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
return cc; return cc;
} }
#endif /* SUPPORT_UNICODE */ #endif /* SUPPORT_UNICODE */

View File

@ -1864,53 +1864,52 @@ static struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cas
{ UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" }, { UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" }, { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80#" }, { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" },
{ UDA, CPI, 4, 1, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf#" }, { UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" },
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" }, { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" }, { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" }, { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" }, { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf#" }, { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" },
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80#" }, { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" },
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80#" }, { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" },
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff#" }, { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" },
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf#" }, { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" },
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80#" }, { UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" },
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80#" }, { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" },
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf#" }, { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" },
{ UDA, CPI, 4, 2, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80#" }, { UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" },
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80#" }, { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" },
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80#" }, { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" },
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" }, { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" }, { UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" }, { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" }, { UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf#" }, { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" },
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0#" }, { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" },
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80#" }, { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" },
{ UDA, CPI, 4, 2, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80#" },
{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" }, { UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" }, { UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf#" }, { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" },
{ UDA, CPI, 3, 1, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf#" }, { UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" },
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80#" }, { UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" },
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff#" }, { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" },
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf#" }, { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" },
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf#" }, { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" },
{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" }, { UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" }, { UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
{ UDA, CPI, 2, 1, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xbf#" }, { UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" },
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf#" }, { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" },
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80#" }, { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" },
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff#" }, { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" },
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf#" }, { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" },
{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" }, { UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" }, { UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80#" }, { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" },
{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80#" }, { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" },
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" }, { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" }, { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
@ -2107,10 +2106,10 @@ static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 };
static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 }; static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 }; static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 }; static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, '#', 0 }; static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 };
static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, '#', 0 }; static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 };
static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, '#', 0 }; static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 };
static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, '#', 0 }; static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 };
static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 }; static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 }; static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 }; static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
@ -2125,10 +2124,10 @@ static struct invalid_utf16_regression_test_case invalid_utf16_regression_test_c
{ UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 }, { UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
{ UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 }, { UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 }, { UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
{ UDA, CI, 0, 0, 2, -1, -1, { allany16, NULL }, test16_2 }, { UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 },
{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 }, { UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 }, { UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
{ UDA, CI, 0, 0, 2, -1, -1, { allany16, NULL }, test16_3 }, { UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 },
{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 }, { UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 }, { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
@ -2137,8 +2136,8 @@ static struct invalid_utf16_regression_test_case invalid_utf16_regression_test_c
{ UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 }, { UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 }, { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 }, { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
{ UDA, CPI, 2, 1, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 }, { UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
{ UDA, CPI, 2, 1, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 }, { UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 }, { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 }, { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
@ -2313,8 +2312,8 @@ static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 };
static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 }; static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 }; static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 }; static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x10ffff, 0 }; static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 };
static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0 }; static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 };
static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 }; static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 };
static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 }; static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 };
static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 }; static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 };
@ -2329,11 +2328,10 @@ static struct invalid_utf32_regression_test_case invalid_utf32_regression_test_c
{ UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 }, { UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 }, { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 }, { UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 }, { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 },
{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 }, { UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
{ UDA, CPI, 4, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 }, { UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 }, { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 },
{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 }, { UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 },