Start working on invalid utf subject support in JIT.

This commit is contained in:
Zoltán Herczeg 2018-09-12 19:06:29 +00:00
parent bf3c7c68ec
commit 12c4a246b6
4 changed files with 1637 additions and 236 deletions

View File

@ -164,6 +164,7 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
#define PCRE2_JIT_INVALID_UTF 0x00000100u
/* These are for pcre2_match(), pcre2_dfa_match(), and pcre2_jit_match(). Note
that PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK can also be passed to these

View File

@ -164,6 +164,7 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
#define PCRE2_JIT_INVALID_UTF 0x00000100u
/* These are for pcre2_match(), pcre2_dfa_match(), and pcre2_jit_match(). Note
that PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK can also be passed to these

File diff suppressed because it is too large Load Diff

View File

@ -93,6 +93,8 @@ POSSIBILITY OF SUCH DAMAGE.
*/
static int regression_tests(void);
static int invalid_utf8_regression_tests(void);
static int invalid_utf16_regression_tests(void);
int main(void)
{
@ -108,7 +110,9 @@ int main(void)
printf("JIT must be enabled to run pcre_jit_test\n");
return 1;
}
return regression_tests();
return regression_tests()
|| invalid_utf8_regression_tests()
|| invalid_utf16_regression_tests();
}
/* --------------------------------------------------------------------------------------- */
@ -184,7 +188,7 @@ static struct regression_test_case regression_test_cases[] = {
{ CM, A, 0, 0, "\\Ca", "CDA" },
{ M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
{ CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
#endif
#endif /* !NEVER_BACKSLASH_C */
{ CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
{ CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
{ CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
@ -262,6 +266,9 @@ static struct regression_test_case regression_test_cases[] = {
{ MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
{ U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
{ U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
{ CM, A, 0, 0, "ab|cd", "CD" },
{ CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
{ CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
/* Greedy and non-greedy ? operators. */
{ MU, A, 0, 0, "(?:a)?a", "laab" },
@ -1163,7 +1170,7 @@ static int regression_tests(void)
#elif defined SUPPORT_PCRE2_32
PCRE2_UCHAR32 cpu_info[128];
#endif
#if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
int return_value;
#endif
@ -1331,9 +1338,8 @@ static int regression_tests(void)
ovector8_2[i] = -2;
}
if (re8) {
(void)pcre2_set_match_limit_8(mcontext8, 10000000);
return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, NULL);
if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
@ -1376,9 +1382,8 @@ static int regression_tests(void)
else
length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
(void)pcre2_set_match_limit_16(mcontext16, 10000000);
return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, NULL);
if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
@ -1421,9 +1426,8 @@ static int regression_tests(void)
else
length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
(void)pcre2_set_match_limit_32(mcontext32, 10000000);
return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, NULL);
if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
@ -1451,7 +1455,7 @@ static int regression_tests(void)
is_successful = 1;
if (!(current->start_offset & F_DIFF)) {
#if defined SUPPORT_UTF && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
if (!(current->start_offset & F_FORCECONV)) {
/* All results must be the same. */
@ -1500,8 +1504,8 @@ static int regression_tests(void)
is_successful = 0;
} else
#endif
if (return_value >= 0 || return_value == PCRE_ERROR_PARTIAL) {
if (return_value == PCRE_ERROR_PARTIAL) {
if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) {
if (return_value == PCRE2_ERROR_PARTIAL) {
return_value = 2;
} else {
return_value *= 2;
@ -1516,20 +1520,20 @@ static int regression_tests(void)
return_value32[0] = return_value;
#endif
/* Transform back the results. */
if (current->flags & PCRE_UTF8) {
if (current->compile_options & PCRE2_UTF) {
#ifdef SUPPORT_PCRE2_16
for (i = 0; i < return_value; ++i) {
if (ovector16_1[i] >= 0)
if (ovector16_1[i] != PCRE2_UNSET)
ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
if (ovector16_2[i] >= 0)
if (ovector16_2[i] != PCRE2_UNSET)
ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
}
#endif
#ifdef SUPPORT_PCRE2_32
for (i = 0; i < return_value; ++i) {
if (ovector32_1[i] >= 0)
if (ovector32_1[i] != PCRE2_UNSET)
ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
if (ovector32_2[i] >= 0)
if (ovector32_2[i] != PCRE2_UNSET)
ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
}
#endif
@ -1539,7 +1543,7 @@ static int regression_tests(void)
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
total, current->pattern, current->input);
is_successful = 0;
}
@ -1547,7 +1551,7 @@ static int regression_tests(void)
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
i, ovector8_1[i], ovector8_2[i], ovector32_1[i], ovector32_2[i],
i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
total, current->pattern, current->input);
is_successful = 0;
}
@ -1555,7 +1559,7 @@ static int regression_tests(void)
#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
i, ovector16_1[i], ovector16_2[i], ovector32_1[i], ovector32_2[i],
i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
total, current->pattern, current->input);
is_successful = 0;
}
@ -1751,4 +1755,426 @@ static int regression_tests(void)
}
}
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
struct invalid_utf8_regression_test_case {
int compile_options;
int jit_compile_options;
int start_offset;
int skip_left;
int skip_right;
int expected_result;
const char *pattern[2];
const char *input;
};
static struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xf0\x90\x80\x80" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
{ UDA, CI, 0, 0, 1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xef\xbf\xbf#" },
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xef\xbf\xbf" },
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xe0\xa0\x80#" },
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xe0\xa0\x80" },
{ UDA, CI, 0, 0, 2, -1, { ".", NULL }, "\xef\xbf\xbf#" },
{ UDA, CI, 0, 0, 1, -1, { ".", NULL }, "\xef\xbf\xbf" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xef\xbf\x7f#" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xef\xbf\xc0" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xe0\x9f\xbf" },
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xed\x9f\xbf#" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xed\xa0\x80#" },
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xee\x80\x80#" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xed\xbf\xbf#" },
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xdf\xbf##" },
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xdf\xbf#" },
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xdf\xbf" },
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xc2\x80##" },
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xc2\x80#" },
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\xc2\x80" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xe0\x80##" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xdf\xc0##" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xe0\x80" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xdf\xc0" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xc1\xbf##" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xc1\xbf" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\x80###" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\x80" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf8###" },
{ UDA, CI, 0, 0, 0, -1, { ".", NULL }, "\xf8" },
{ UDA, CI, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80#" },
{ UDA, CPI, 4, 1, 0, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf#" },
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "#\xef\xbf\xbf#" },
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "#\xe0\xa0\x80#" },
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf#" },
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80#" },
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80#" },
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff#" },
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf#" },
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80#" },
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80#" },
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf#" },
{ UDA, CPI, 4, 2, 0, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80#" },
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "#\xf0\x80\x80#" },
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "#\xed\xa0\x80#" },
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "##\xdf\xbf#" },
{ UDA, CPI, 4, 2, 0, 1, { "\\B", NULL }, "##\xdf\xbf#" },
{ UDA, CPI, 4, 0, 0, 1, { "\\B", NULL }, "##\xc2\x80#" },
{ UDA, CPI, 4, 2, 0, 1, { "\\B", NULL }, "##\xc2\x80#" },
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "##\xc1\xbf#" },
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "##\xdf\xc0#" },
{ UDA, CPI, 4, 0, 0, -1, { "\\B", "\\b" }, "##\xe0\x80#" },
{ UDA, CPI, 4, 2, 0, -1, { "\\B", "\\b" }, "##\xe0\x80#" },
{ UDA, CPI, 3, 0, 0, 1, { "\\B", NULL }, "\xef\xbf\xbf#" },
{ UDA, CPI, 3, 0, 0, 1, { "\\B", NULL }, "\xe0\xa0\x80#" },
{ UDA, CPI, 3, 0, 0, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf#" },
{ UDA, CPI, 3, 1, 0, -1, { "\\B", "\\b" }, "\xef\xbf\xbf#" },
{ UDA, CPI, 3, 0, 0, -1, { "\\B", "\\b" }, "\xdf\x80\x80#" },
{ UDA, CPI, 3, 0, 0, -1, { "\\B", "\\b" }, "\xef\xbf\xff#" },
{ UDA, CPI, 3, 0, 0, -1, { "\\B", "\\b" }, "\xef\xff\xbf#" },
{ UDA, CPI, 3, 0, 0, -1, { "\\B", "\\b" }, "\xed\xbf\xbf#" },
{ UDA, CPI, 2, 0, 0, 1, { "\\B", NULL }, "\xdf\xbf#" },
{ UDA, CPI, 2, 0, 0, 1, { "\\B", NULL }, "\xc2\x80#" },
{ UDA, CPI, 2, 1, 0, -1, { "\\B", "\\b" }, "\xdf\xbf#" },
{ UDA, CPI, 2, 0, 0, -1, { "\\B", "\\b" }, "\xc1\xbf#" },
{ UDA, CPI, 2, 0, 0, -1, { "\\B", "\\b" }, "\xe0\x80#" },
{ UDA, CPI, 2, 0, 0, -1, { "\\B", "\\b" }, "\xdf\xff#" },
{ UDA, CPI, 2, 0, 0, -1, { "\\B", "\\b" }, "\xff\xbf#" },
{ UDA, CPI, 1, 0, 0, 1, { "\\B", NULL }, "\x7f#" },
{ UDA, CPI, 1, 0, 0, 1, { "\\B", NULL }, "\x01#" },
{ UDA, CPI, 1, 0, 0, -1, { "\\B", "\\b" }, "\x80#" },
{ UDA, CPI, 1, 0, 0, -1, { "\\B", "\\b" }, "\x80#" },
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, { "(.)\\1", NULL }, "a\xff" },
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
{ UDA, CPI, 0, 0, 0, 1, { "\\X", NULL }, "A" },
{ UDA, CPI, 0, 0, 0, -1, { "\\X", NULL }, "\xff" },
{ UDA, CPI, 0, 0, 0, 1, { "\\X", NULL }, "\xc3\xa1" },
{ UDA, CPI, 0, 0, 1, -1, { "\\X", NULL }, "\xc3\xa1" },
{ UDA, CPI, 0, 0, 0, -1, { "\\X", NULL }, "\xc3\x7f" },
{ UDA, CPI, 0, 0, 0, 1, { "\\X", NULL }, "\xe1\xbd\xb8" },
{ UDA, CPI, 0, 0, 1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
{ UDA, CPI, 0, 0, 0, 1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
{ UDA, CPI, 0, 0, 1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
{ 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
};
#undef UDA
#undef CI
#undef CPI
static int run_invalid_utf8_test(struct invalid_utf8_regression_test_case *current,
int pattern_index, int i, pcre2_match_data_8 *mdata)
{
pcre2_code_8 *code;
int result, errorcode;
PCRE2_SIZE length, erroroffset;
if (current->pattern[i] == NULL)
return 1;
code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
current->compile_options, &errorcode, &erroroffset, NULL);
if (!code) {
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
return 0;
}
if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
pcre2_code_free_8(code);
return 0;
}
length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
length, current->start_offset - current->skip_left, 0, mdata, NULL);
if (result != current->expected_result) {
printf("Pattern[%d:0] match result %d differs from expected %d.\n", pattern_index, result, current->expected_result);
pcre2_code_free_8(code);
return 0;
}
}
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
if (result != current->expected_result) {
printf("Pattern[%d:0] partial match result %d differs from expected %d.\n", pattern_index, result, current->expected_result);
pcre2_code_free_8(code);
return 0;
}
}
pcre2_code_free_8(code);
return 1;
}
static int invalid_utf8_regression_tests(void)
{
struct invalid_utf8_regression_test_case *current;
pcre2_match_data_8 *mdata;
int total = 0, successful = 0;
int result;
printf("\nRunning invalid-utf8 JIT regression tests\n");
mdata = pcre2_match_data_create_8(4, NULL);
for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
/* printf("\nPattern: %s :\n", current->pattern); */
total++;
result = 1;
if (!run_invalid_utf8_test(current, total - 1, 0, mdata))
result = 0;
if (!run_invalid_utf8_test(current, total - 1, 1, mdata))
result = 0;
if (result) {
successful++;
}
printf(".");
if ((total % 60) == 0)
printf("\n");
}
if ((total % 60) != 0)
printf("\n");
pcre2_match_data_free_8(mdata);
if (total == successful) {
printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
return 0;
} else {
printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
return 1;
}
}
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
static int invalid_utf8_regression_tests(void)
{
return 0;
}
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
struct invalid_utf16_regression_test_case {
int compile_options;
int jit_compile_options;
int start_offset;
int skip_left;
int skip_right;
int expected_result;
const PCRE2_UCHAR16 *pattern[2];
const PCRE2_UCHAR16 *input;
};
static PCRE2_UCHAR16 allany[] = { '.', 0 };
static PCRE2_UCHAR16 non_word_boundary[] = { '\\', 'B', 0 };
static PCRE2_UCHAR16 word_boundary[] = { '\\', 'b', 0 };
static PCRE2_UCHAR16 backreference[] = { '(', '.', ')', '\\', '1', 0 };
static PCRE2_UCHAR16 grapheme[] = { '\\', 'X', 0 };
static PCRE2_UCHAR16 test1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
static PCRE2_UCHAR16 test2[] = { 0xd800, 0xdc00, '#', 0 };
static PCRE2_UCHAR16 test3[] = { 0xdbff, 0xdfff, '#', 0 };
static PCRE2_UCHAR16 test4[] = { 0xd800, 0xdbff, '#', 0 };
static PCRE2_UCHAR16 test5[] = { '#', 0xd800, '#', 0 };
static PCRE2_UCHAR16 test6[] = { 'a', 'A', 0xdc28, 0 };
static PCRE2_UCHAR16 test7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
static struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
{ UDA, CI, 0, 0, 0, 1, { allany, NULL }, test1 },
{ UDA, CI, 1, 0, 0, 1, { allany, NULL }, test1 },
{ UDA, CI, 2, 0, 0, 1, { allany, NULL }, test1 },
{ UDA, CI, 3, 0, 0, 1, { allany, NULL }, test1 },
{ UDA, CI, 0, 0, 0, 1, { allany, NULL }, test2 },
{ UDA, CI, 0, 0, 2, -1, { allany, NULL }, test2 },
{ UDA, CI, 1, 0, 0, -1, { allany, NULL }, test2 },
{ UDA, CI, 0, 0, 0, 1, { allany, NULL }, test3 },
{ UDA, CI, 0, 0, 2, -1, { allany, NULL }, test3 },
{ UDA, CI, 1, 0, 0, -1, { allany, NULL }, test3 },
{ UDA, CPI, 1, 0, 0, 1, { non_word_boundary, NULL }, test1 },
{ UDA, CPI, 2, 0, 0, 1, { non_word_boundary, NULL }, test1 },
{ UDA, CPI, 3, 0, 0, 1, { non_word_boundary, NULL }, test1 },
{ UDA, CPI, 4, 0, 0, 1, { non_word_boundary, NULL }, test1 },
{ UDA, CPI, 2, 0, 0, 1, { non_word_boundary, NULL }, test2 },
{ UDA, CPI, 2, 0, 0, 1, { non_word_boundary, NULL }, test3 },
{ UDA, CPI, 2, 1, 0, -1, { non_word_boundary, word_boundary }, test2 },
{ UDA, CPI, 2, 1, 0, -1, { non_word_boundary, word_boundary }, test3 },
{ UDA, CPI, 2, 0, 0, -1, { non_word_boundary, word_boundary }, test4 },
{ UDA, CPI, 2, 0, 0, -1, { non_word_boundary, word_boundary }, test5 },
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { backreference, NULL }, test6 },
{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, { backreference, NULL }, test6 },
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 2, { backreference, NULL }, test7 },
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, { backreference, NULL }, test7 },
{ UDA, CPI, 0, 0, 0, 1, { grapheme, NULL }, test6 },
{ UDA, CPI, 1, 0, 0, 1, { grapheme, NULL }, test6 },
{ UDA, CPI, 2, 0, 0, -1, { grapheme, NULL }, test6 },
{ UDA, CPI, 0, 0, 0, 1, { grapheme, NULL }, test7 },
{ UDA, CPI, 2, 0, 0, 1, { grapheme, NULL }, test7 },
{ UDA, CPI, 1, 0, 0, -1, { grapheme, NULL }, test7 },
{ 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
};
#undef UDA
#undef CI
#undef CPI
static int run_invalid_utf16_test(struct invalid_utf16_regression_test_case *current,
int pattern_index, int i, pcre2_match_data_16 *mdata)
{
pcre2_code_16 *code;
int result, errorcode;
PCRE2_SIZE length, erroroffset;
const PCRE2_UCHAR16 *input;
if (current->pattern[i] == NULL)
return 1;
code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
current->compile_options, &errorcode, &erroroffset, NULL);
if (!code) {
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
return 0;
}
if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
pcre2_code_free_16(code);
return 0;
}
input = current->input;
length = 0;
while (*input++ != 0)
length++;
length -= current->skip_left + current->skip_right;
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
length, current->start_offset - current->skip_left, 0, mdata, NULL);
if (result != current->expected_result) {
printf("Pattern[%d:0] match result %d differs from expected %d.\n", pattern_index, result, current->expected_result);
pcre2_code_free_16(code);
return 0;
}
}
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
if (result != current->expected_result) {
printf("Pattern[%d:0] partial match result %d differs from expected %d.\n", pattern_index, result, current->expected_result);
pcre2_code_free_16(code);
return 0;
}
}
pcre2_code_free_16(code);
return 1;
}
static int invalid_utf16_regression_tests(void)
{
struct invalid_utf16_regression_test_case *current;
pcre2_match_data_16 *mdata;
int total = 0, successful = 0;
int result;
printf("\nRunning invalid-utf16 JIT regression tests\n");
mdata = pcre2_match_data_create_16(4, NULL);
for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
/* printf("\nPattern: %s :\n", current->pattern); */
total++;
result = 1;
if (!run_invalid_utf16_test(current, total - 1, 0, mdata))
result = 0;
if (!run_invalid_utf16_test(current, total - 1, 1, mdata))
result = 0;
if (result) {
successful++;
}
printf(".");
if ((total % 60) == 0)
printf("\n");
}
if ((total % 60) != 0)
printf("\n");
pcre2_match_data_free_16(mdata);
if (total == successful) {
printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
return 0;
} else {
printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
return 1;
}
}
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
static int invalid_utf16_regression_tests(void)
{
return 0;
}
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
/* End of pcre2_jit_test.c */