Start working on invalid utf subject support in JIT.
This commit is contained in:
parent
bf3c7c68ec
commit
12c4a246b6
|
@ -164,6 +164,7 @@ D is inspected during pcre2_dfa_match() execution
|
|||
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
|
||||
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
||||
#define PCRE2_JIT_INVALID_UTF 0x00000100u
|
||||
|
||||
/* These are for pcre2_match(), pcre2_dfa_match(), and pcre2_jit_match(). Note
|
||||
that PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK can also be passed to these
|
||||
|
|
|
@ -164,6 +164,7 @@ D is inspected during pcre2_dfa_match() execution
|
|||
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
|
||||
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
||||
#define PCRE2_JIT_INVALID_UTF 0x00000100u
|
||||
|
||||
/* These are for pcre2_match(), pcre2_dfa_match(), and pcre2_jit_match(). Note
|
||||
that PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK can also be passed to these
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -93,6 +93,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
*/
|
||||
|
||||
static int regression_tests(void);
|
||||
static int invalid_utf8_regression_tests(void);
|
||||
static int invalid_utf16_regression_tests(void);
|
||||
|
||||
int main(void)
|
||||
{
|
||||
|
@ -108,7 +110,9 @@ int main(void)
|
|||
printf("JIT must be enabled to run pcre_jit_test\n");
|
||||
return 1;
|
||||
}
|
||||
return regression_tests();
|
||||
return regression_tests()
|
||||
|| invalid_utf8_regression_tests()
|
||||
|| invalid_utf16_regression_tests();
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------------------------- */
|
||||
|
@ -184,7 +188,7 @@ static struct regression_test_case regression_test_cases[] = {
|
|||
{ CM, A, 0, 0, "\\Ca", "CDA" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
|
||||
{ CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
|
||||
#endif
|
||||
#endif /* !NEVER_BACKSLASH_C */
|
||||
{ CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
||||
{ CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
||||
{ CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
||||
|
@ -262,6 +266,9 @@ static struct regression_test_case regression_test_cases[] = {
|
|||
{ MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
|
||||
{ U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
|
||||
{ U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
|
||||
{ CM, A, 0, 0, "ab|cd", "CD" },
|
||||
{ CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
|
||||
{ CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
|
||||
|
||||
/* Greedy and non-greedy ? operators. */
|
||||
{ MU, A, 0, 0, "(?:a)?a", "laab" },
|
||||
|
@ -1163,7 +1170,7 @@ static int regression_tests(void)
|
|||
#elif defined SUPPORT_PCRE2_32
|
||||
PCRE2_UCHAR32 cpu_info[128];
|
||||
#endif
|
||||
#if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
|
||||
#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
|
||||
int return_value;
|
||||
#endif
|
||||
|
||||
|
@ -1331,9 +1338,8 @@ static int regression_tests(void)
|
|||
ovector8_2[i] = -2;
|
||||
}
|
||||
if (re8) {
|
||||
(void)pcre2_set_match_limit_8(mcontext8, 10000000);
|
||||
return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, NULL);
|
||||
|
||||
if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
|
||||
printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
|
||||
|
@ -1376,9 +1382,8 @@ static int regression_tests(void)
|
|||
else
|
||||
length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
|
||||
|
||||
(void)pcre2_set_match_limit_16(mcontext16, 10000000);
|
||||
return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, NULL);
|
||||
|
||||
if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
|
||||
printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
|
||||
|
@ -1421,9 +1426,8 @@ static int regression_tests(void)
|
|||
else
|
||||
length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
|
||||
|
||||
(void)pcre2_set_match_limit_32(mcontext32, 10000000);
|
||||
return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, NULL);
|
||||
|
||||
if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
|
||||
printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
|
||||
|
@ -1451,7 +1455,7 @@ static int regression_tests(void)
|
|||
|
||||
is_successful = 1;
|
||||
if (!(current->start_offset & F_DIFF)) {
|
||||
#if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
|
||||
#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
|
||||
if (!(current->start_offset & F_FORCECONV)) {
|
||||
|
||||
/* All results must be the same. */
|
||||
|
@ -1500,8 +1504,8 @@ static int regression_tests(void)
|
|||
is_successful = 0;
|
||||
} else
|
||||
#endif
|
||||
if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
|
||||
if (return_value == PCRE_ERROR_PARTIAL) {
|
||||
if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) {
|
||||
if (return_value == PCRE2_ERROR_PARTIAL) {
|
||||
return_value = 2;
|
||||
} else {
|
||||
return_value *= 2;
|
||||
|
@ -1516,20 +1520,20 @@ static int regression_tests(void)
|
|||
return_value32[0] = return_value;
|
||||
#endif
|
||||
/* Transform back the results. */
|
||||
if (current->flags & PCRE_UTF8) {
|
||||
if (current->compile_options & PCRE2_UTF) {
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
for (i = 0; i < return_value; ++i) {
|
||||
if (ovector16_1[i] >= 0)
|
||||
if (ovector16_1[i] != PCRE2_UNSET)
|
||||
ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
|
||||
if (ovector16_2[i] >= 0)
|
||||
if (ovector16_2[i] != PCRE2_UNSET)
|
||||
ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
|
||||
}
|
||||
#endif
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
for (i = 0; i < return_value; ++i) {
|
||||
if (ovector32_1[i] >= 0)
|
||||
if (ovector32_1[i] != PCRE2_UNSET)
|
||||
ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
|
||||
if (ovector32_2[i] >= 0)
|
||||
if (ovector32_2[i] != PCRE2_UNSET)
|
||||
ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
|
||||
}
|
||||
#endif
|
||||
|
@ -1539,7 +1543,7 @@ static int regression_tests(void)
|
|||
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
|
||||
if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
|
||||
printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
|
||||
i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
|
||||
i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
|
@ -1547,7 +1551,7 @@ static int regression_tests(void)
|
|||
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
|
||||
if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
|
||||
printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
|
||||
i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
|
||||
i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
|
@ -1555,7 +1559,7 @@ static int regression_tests(void)
|
|||
#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
|
||||
if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
|
||||
printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
|
||||
i, ovector16_1[i], ovector16_2[i], ovector32_1[i], ovector32_2[i],
|
||||
i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
|
||||
total, current->pattern, current->input);
|
||||
is_successful = 0;
|
||||
}
|
||||
|
@ -1751,4 +1755,426 @@ static int regression_tests(void)
|
|||
}
|
||||
}
|
||||
|
||||
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
|
||||
|
||||
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
||||
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
|
||||
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
|
||||
|
||||
struct invalid_utf8_regression_test_case {
|
||||
int compile_options;
|
||||
int jit_compile_options;
|
||||
int start_offset;
|
||||
int skip_left;
|
||||
int skip_right;
|
||||
int expected_result;
|
||||
const char *pattern[2];
|
||||
const char *input;
|
||||
};
|
||||
|
||||
static struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
|
||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xf0\x90\x80\x80" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
|
||||
{ UDA, CI, 0, 0, 1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xef\xbf\xbf#" },
|
||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xef\xbf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xe0\xa0\x80#" },
|
||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xe0\xa0\x80" },
|
||||
{ UDA, CI, 0, 0, 2, -1, { ".", NULL }, "\xef\xbf\xbf#" },
|
||||
{ UDA, CI, 0, 0, 1, -1, { ".", NULL }, "\xef\xbf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xef\xbf\x7f#" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xef\xbf\xc0" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xe0\x9f\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xed\x9f\xbf#" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xed\xa0\x80#" },
|
||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xee\x80\x80#" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xed\xbf\xbf#" },
|
||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xdf\xbf##" },
|
||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xdf\xbf#" },
|
||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xdf\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xc2\x80##" },
|
||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xc2\x80#" },
|
||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xc2\x80" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xe0\x80##" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xdf\xc0##" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xe0\x80" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xdf\xc0" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xc1\xbf##" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xc1\xbf" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\x80###" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\x80" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf8###" },
|
||||
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf8" },
|
||||
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
|
||||
|
||||
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80#" },
|
||||
{ UDA, CPI, 4, 1, 0, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "#\xef\xbf\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "#\xe0\xa0\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf#" },
|
||||
{ UDA, CPI, 4, 2, 0, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "#\xf0\x80\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "#\xed\xa0\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "##\xdf\xbf#" },
|
||||
{ UDA, CPI, 4, 2, 0, 1, { "\\B", NULL }, "##\xdf\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "##\xc2\x80#" },
|
||||
{ UDA, CPI, 4, 2, 0, 1, { "\\B", NULL }, "##\xc2\x80#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "##\xc1\xbf#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "##\xdf\xc0#" },
|
||||
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "##\xe0\x80#" },
|
||||
{ UDA, CPI, 4, 2, 0, -1, { "\\B", "\\b" }, "##\xe0\x80#" },
|
||||
|
||||
{ UDA, CPI, 3, 0, 0, 1, { "\\B", NULL }, "\xef\xbf\xbf#" },
|
||||
{ UDA, CPI, 3, 0, 0, 1, { "\\B", NULL }, "\xe0\xa0\x80#" },
|
||||
{ UDA, CPI, 3, 0, 0, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf#" },
|
||||
{ UDA, CPI, 3, 1, 0, -1, { "\\B", "\\b" }, "\xef\xbf\xbf#" },
|
||||
{ UDA, CPI, 3, 0, 0, -1, { "\\B", "\\b" }, "\xdf\x80\x80#" },
|
||||
{ UDA, CPI, 3, 0, 0, -1, { "\\B", "\\b" }, "\xef\xbf\xff#" },
|
||||
{ UDA, CPI, 3, 0, 0, -1, { "\\B", "\\b" }, "\xef\xff\xbf#" },
|
||||
{ UDA, CPI, 3, 0, 0, -1, { "\\B", "\\b" }, "\xed\xbf\xbf#" },
|
||||
|
||||
{ UDA, CPI, 2, 0, 0, 1, { "\\B", NULL }, "\xdf\xbf#" },
|
||||
{ UDA, CPI, 2, 0, 0, 1, { "\\B", NULL }, "\xc2\x80#" },
|
||||
{ UDA, CPI, 2, 1, 0, -1, { "\\B", "\\b" }, "\xdf\xbf#" },
|
||||
{ UDA, CPI, 2, 0, 0, -1, { "\\B", "\\b" }, "\xc1\xbf#" },
|
||||
{ UDA, CPI, 2, 0, 0, -1, { "\\B", "\\b" }, "\xe0\x80#" },
|
||||
{ UDA, CPI, 2, 0, 0, -1, { "\\B", "\\b" }, "\xdf\xff#" },
|
||||
{ UDA, CPI, 2, 0, 0, -1, { "\\B", "\\b" }, "\xff\xbf#" },
|
||||
|
||||
{ UDA, CPI, 1, 0, 0, 1, { "\\B", NULL }, "\x7f#" },
|
||||
{ UDA, CPI, 1, 0, 0, 1, { "\\B", NULL }, "\x01#" },
|
||||
{ UDA, CPI, 1, 0, 0, -1, { "\\B", "\\b" }, "\x80#" },
|
||||
{ UDA, CPI, 1, 0, 0, -1, { "\\B", "\\b" }, "\x80#" },
|
||||
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, { "(.)\\1", NULL }, "a\xff" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
|
||||
|
||||
{ UDA, CPI, 0, 0, 0, 1, { "\\X", NULL }, "A" },
|
||||
{ UDA, CPI, 0, 0, 0, -1, { "\\X", NULL }, "\xff" },
|
||||
{ UDA, CPI, 0, 0, 0, 1, { "\\X", NULL }, "\xc3\xa1" },
|
||||
{ UDA, CPI, 0, 0, 1, -1, { "\\X", NULL }, "\xc3\xa1" },
|
||||
{ UDA, CPI, 0, 0, 0, -1, { "\\X", NULL }, "\xc3\x7f" },
|
||||
{ UDA, CPI, 0, 0, 0, 1, { "\\X", NULL }, "\xe1\xbd\xb8" },
|
||||
{ UDA, CPI, 0, 0, 1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
|
||||
{ UDA, CPI, 0, 0, 0, 1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
|
||||
{ UDA, CPI, 0, 0, 1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
|
||||
|
||||
{ 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
||||
};
|
||||
|
||||
#undef UDA
|
||||
#undef CI
|
||||
#undef CPI
|
||||
|
||||
static int run_invalid_utf8_test(struct invalid_utf8_regression_test_case *current,
|
||||
int pattern_index, int i, pcre2_match_data_8 *mdata)
|
||||
{
|
||||
pcre2_code_8 *code;
|
||||
int result, errorcode;
|
||||
PCRE2_SIZE length, erroroffset;
|
||||
|
||||
if (current->pattern[i] == NULL)
|
||||
return 1;
|
||||
|
||||
code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
|
||||
current->compile_options, &errorcode, &erroroffset, NULL);
|
||||
|
||||
if (!code) {
|
||||
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
|
||||
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
|
||||
pcre2_code_free_8(code);
|
||||
return 0;
|
||||
}
|
||||
|
||||
length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
|
||||
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
||||
|
||||
if (result != current->expected_result) {
|
||||
printf("Pattern[%d:0] match result %d differs from expected %d.\n", pattern_index, result, current->expected_result);
|
||||
pcre2_code_free_8(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
|
||||
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
||||
|
||||
if (result != current->expected_result) {
|
||||
printf("Pattern[%d:0] partial match result %d differs from expected %d.\n", pattern_index, result, current->expected_result);
|
||||
pcre2_code_free_8(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
pcre2_code_free_8(code);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int invalid_utf8_regression_tests(void)
|
||||
{
|
||||
struct invalid_utf8_regression_test_case *current;
|
||||
pcre2_match_data_8 *mdata;
|
||||
int total = 0, successful = 0;
|
||||
int result;
|
||||
|
||||
printf("\nRunning invalid-utf8 JIT regression tests\n");
|
||||
|
||||
mdata = pcre2_match_data_create_8(4, NULL);
|
||||
|
||||
for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
|
||||
/* printf("\nPattern: %s :\n", current->pattern); */
|
||||
total++;
|
||||
|
||||
result = 1;
|
||||
if (!run_invalid_utf8_test(current, total - 1, 0, mdata))
|
||||
result = 0;
|
||||
if (!run_invalid_utf8_test(current, total - 1, 1, mdata))
|
||||
result = 0;
|
||||
|
||||
if (result) {
|
||||
successful++;
|
||||
}
|
||||
|
||||
printf(".");
|
||||
if ((total % 60) == 0)
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if ((total % 60) != 0)
|
||||
printf("\n");
|
||||
|
||||
pcre2_match_data_free_8(mdata);
|
||||
|
||||
if (total == successful) {
|
||||
printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
|
||||
return 0;
|
||||
} else {
|
||||
printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
|
||||
|
||||
static int invalid_utf8_regression_tests(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
|
||||
|
||||
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
|
||||
|
||||
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
||||
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
|
||||
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
|
||||
|
||||
struct invalid_utf16_regression_test_case {
|
||||
int compile_options;
|
||||
int jit_compile_options;
|
||||
int start_offset;
|
||||
int skip_left;
|
||||
int skip_right;
|
||||
int expected_result;
|
||||
const PCRE2_UCHAR16 *pattern[2];
|
||||
const PCRE2_UCHAR16 *input;
|
||||
};
|
||||
|
||||
static PCRE2_UCHAR16 allany[] = { '.', 0 };
|
||||
static PCRE2_UCHAR16 non_word_boundary[] = { '\\', 'B', 0 };
|
||||
static PCRE2_UCHAR16 word_boundary[] = { '\\', 'b', 0 };
|
||||
static PCRE2_UCHAR16 backreference[] = { '(', '.', ')', '\\', '1', 0 };
|
||||
static PCRE2_UCHAR16 grapheme[] = { '\\', 'X', 0 };
|
||||
static PCRE2_UCHAR16 test1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
|
||||
static PCRE2_UCHAR16 test2[] = { 0xd800, 0xdc00, '#', 0 };
|
||||
static PCRE2_UCHAR16 test3[] = { 0xdbff, 0xdfff, '#', 0 };
|
||||
static PCRE2_UCHAR16 test4[] = { 0xd800, 0xdbff, '#', 0 };
|
||||
static PCRE2_UCHAR16 test5[] = { '#', 0xd800, '#', 0 };
|
||||
static PCRE2_UCHAR16 test6[] = { 'a', 'A', 0xdc28, 0 };
|
||||
static PCRE2_UCHAR16 test7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
|
||||
|
||||
static struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
|
||||
{ UDA, CI, 0, 0, 0, 1, { allany, NULL }, test1 },
|
||||
{ UDA, CI, 1, 0, 0, 1, { allany, NULL }, test1 },
|
||||
{ UDA, CI, 2, 0, 0, 1, { allany, NULL }, test1 },
|
||||
{ UDA, CI, 3, 0, 0, 1, { allany, NULL }, test1 },
|
||||
{ UDA, CI, 0, 0, 0, 1, { allany, NULL }, test2 },
|
||||
{ UDA, CI, 0, 0, 2, -1, { allany, NULL }, test2 },
|
||||
{ UDA, CI, 1, 0, 0, -1, { allany, NULL }, test2 },
|
||||
{ UDA, CI, 0, 0, 0, 1, { allany, NULL }, test3 },
|
||||
{ UDA, CI, 0, 0, 2, -1, { allany, NULL }, test3 },
|
||||
{ UDA, CI, 1, 0, 0, -1, { allany, NULL }, test3 },
|
||||
|
||||
{ UDA, CPI, 1, 0, 0, 1, { non_word_boundary, NULL }, test1 },
|
||||
{ UDA, CPI, 2, 0, 0, 1, { non_word_boundary, NULL }, test1 },
|
||||
{ UDA, CPI, 3, 0, 0, 1, { non_word_boundary, NULL }, test1 },
|
||||
{ UDA, CPI, 4, 0, 0, 1, { non_word_boundary, NULL }, test1 },
|
||||
{ UDA, CPI, 2, 0, 0, 1, { non_word_boundary, NULL }, test2 },
|
||||
{ UDA, CPI, 2, 0, 0, 1, { non_word_boundary, NULL }, test3 },
|
||||
{ UDA, CPI, 2, 1, 0, -1, { non_word_boundary, word_boundary }, test2 },
|
||||
{ UDA, CPI, 2, 1, 0, -1, { non_word_boundary, word_boundary }, test3 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, { non_word_boundary, word_boundary }, test4 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, { non_word_boundary, word_boundary }, test5 },
|
||||
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { backreference, NULL }, test6 },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, { backreference, NULL }, test6 },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { backreference, NULL }, test7 },
|
||||
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, { backreference, NULL }, test7 },
|
||||
|
||||
{ UDA, CPI, 0, 0, 0, 1, { grapheme, NULL }, test6 },
|
||||
{ UDA, CPI, 1, 0, 0, 1, { grapheme, NULL }, test6 },
|
||||
{ UDA, CPI, 2, 0, 0, -1, { grapheme, NULL }, test6 },
|
||||
{ UDA, CPI, 0, 0, 0, 1, { grapheme, NULL }, test7 },
|
||||
{ UDA, CPI, 2, 0, 0, 1, { grapheme, NULL }, test7 },
|
||||
{ UDA, CPI, 1, 0, 0, -1, { grapheme, NULL }, test7 },
|
||||
|
||||
{ 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
||||
};
|
||||
|
||||
#undef UDA
|
||||
#undef CI
|
||||
#undef CPI
|
||||
|
||||
static int run_invalid_utf16_test(struct invalid_utf16_regression_test_case *current,
|
||||
int pattern_index, int i, pcre2_match_data_16 *mdata)
|
||||
{
|
||||
pcre2_code_16 *code;
|
||||
int result, errorcode;
|
||||
PCRE2_SIZE length, erroroffset;
|
||||
const PCRE2_UCHAR16 *input;
|
||||
|
||||
if (current->pattern[i] == NULL)
|
||||
return 1;
|
||||
|
||||
code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
|
||||
current->compile_options, &errorcode, &erroroffset, NULL);
|
||||
|
||||
if (!code) {
|
||||
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
|
||||
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
|
||||
pcre2_code_free_16(code);
|
||||
return 0;
|
||||
}
|
||||
|
||||
input = current->input;
|
||||
length = 0;
|
||||
|
||||
while (*input++ != 0)
|
||||
length++;
|
||||
|
||||
length -= current->skip_left + current->skip_right;
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
|
||||
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
||||
|
||||
if (result != current->expected_result) {
|
||||
printf("Pattern[%d:0] match result %d differs from expected %d.\n", pattern_index, result, current->expected_result);
|
||||
pcre2_code_free_16(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
|
||||
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
|
||||
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
||||
|
||||
if (result != current->expected_result) {
|
||||
printf("Pattern[%d:0] partial match result %d differs from expected %d.\n", pattern_index, result, current->expected_result);
|
||||
pcre2_code_free_16(code);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
pcre2_code_free_16(code);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int invalid_utf16_regression_tests(void)
|
||||
{
|
||||
struct invalid_utf16_regression_test_case *current;
|
||||
pcre2_match_data_16 *mdata;
|
||||
int total = 0, successful = 0;
|
||||
int result;
|
||||
|
||||
printf("\nRunning invalid-utf16 JIT regression tests\n");
|
||||
|
||||
mdata = pcre2_match_data_create_16(4, NULL);
|
||||
|
||||
for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
|
||||
/* printf("\nPattern: %s :\n", current->pattern); */
|
||||
total++;
|
||||
|
||||
result = 1;
|
||||
if (!run_invalid_utf16_test(current, total - 1, 0, mdata))
|
||||
result = 0;
|
||||
if (!run_invalid_utf16_test(current, total - 1, 1, mdata))
|
||||
result = 0;
|
||||
|
||||
if (result) {
|
||||
successful++;
|
||||
}
|
||||
|
||||
printf(".");
|
||||
if ((total % 60) == 0)
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if ((total % 60) != 0)
|
||||
printf("\n");
|
||||
|
||||
pcre2_match_data_free_16(mdata);
|
||||
|
||||
if (total == successful) {
|
||||
printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
|
||||
return 0;
|
||||
} else {
|
||||
printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
|
||||
|
||||
static int invalid_utf16_regression_tests(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
|
||||
|
||||
/* End of pcre2_jit_test.c */
|
||||
|
|
Loading…
Reference in New Issue