Implement PCRE2_LITERAL and REG_NOSPEC.

This commit is contained in:
Philip.Hazel 2017-06-15 16:41:44 +00:00
parent 95724543c3
commit c4fac10bad
18 changed files with 309 additions and 109 deletions

View File

@ -187,6 +187,8 @@ starting offset greater than zero.
40. Implement the subject_literal modifier in pcre2test, and allow jitstack on
pattern lines.
41. Implement PCRE2_LITERAL and use it to support REG_NOSPEC.
Version 10.23 14-February-2017
------------------------------

View File

@ -500,7 +500,7 @@ for bmode in "$test8" "$test16" "$test32"; do
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
if [ $? = 0 ] ; then
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -65,-62,-2,-1,0,100,101,191,192 >>testtry
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -65,-62,-2,-1,0,100,101,191,200 >>testtry
checkresult $? 2 "$opt"
fi
done

View File

@ -1,4 +1,4 @@
.TH PCRE2API 3 "01 June 2017" "PCRE2 10.30"
.TH PCRE2API 3 "15 June 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@ -1393,6 +1393,17 @@ continue over the newline. See also PCRE2_USE_OFFSET_LIMIT, which provides a
more general limiting facility. If PCRE2_FIRSTLINE is set with an offset limit,
a match must occur in the first line and also within the offset limit. In other
words, whichever limit comes first is used.
.sp
PCRE2_LITERAL
.sp
If this option is set, all meta-characters in the pattern are disabled, and it
is treated as a literal string. Matching literal strings with a regular
expression engine is not the most efficient way of doing it. If you are doing a
lot of literal matching and are worried about efficiency, you should consider
using other approaches. The only other options that are allowed with
PCRE2_LITERAL are: PCRE2_ANCHORED, PCRE2_ENDANCHORED, PCRE2_AUTO_CALLOUT,
PCRE2_CASELESS, PCRE2_FIRSTLINE, PCRE2_NO_START_OPTIMIZE, PCRE2_NO_UTF_CHECK,
PCRE2_UTF, and PCRE2_USE_OFFSET_LIMIT. Any other options cause an error.
.sp
PCRE2_MATCH_UNSET_BACKREF
.sp
@ -3508,6 +3519,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 01 June 2017
Last updated: 15 June 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi

View File

@ -1,4 +1,4 @@
.TH PCRE2POSIX 3 "05 June 2017" "PCRE2 10.30"
.TH PCRE2POSIX 3 "15 June 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "SYNOPSIS"
@ -93,6 +93,14 @@ compilation to the native function.
The PCRE2_MULTILINE option is set when the regular expression is passed for
compilation to the native function. Note that this does \fInot\fP mimic the
defined POSIX behaviour for REG_NEWLINE (see the following section).
.sp
REG_NOSPEC
.sp
The PCRE2_LITERAL option is set when the regular expression is passed for
compilation to the native function. This disables all meta characters in the
pattern, causing it to be treated as a literal string. The only other options
that are allowed with REG_NOSPEC are REG_ICASE, REG_NOSUB, REG_PEND, and
REG_UTF. Note that REG_NOSPEC is not part of the POSIX standard.
.sp
REG_NOSUB
.sp
@ -292,6 +300,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 05 June 2017
Last updated: 15 June 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi

View File

@ -1,4 +1,4 @@
.TH PCRE2TEST 1 "12 June 2017" "PCRE 10.30"
.TH PCRE2TEST 1 "15 June 2017" "PCRE 10.30"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@ -555,6 +555,7 @@ for a description of the effects of these options.
/x extended set PCRE2_EXTENDED
/xx extended_more set PCRE2_EXTENDED_MORE
firstline set PCRE2_FIRSTLINE
literal set PCRE2_LITERAL
match_unset_backref set PCRE2_MATCH_UNSET_BACKREF
/m multiline set PCRE2_MULTILINE
never_backslash_c set PCRE2_NEVER_BACKSLASH_C
@ -1834,6 +1835,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 12 June 2017
Last updated: 15 June 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi

View File

@ -138,6 +138,7 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
#define PCRE2_LITERAL 0x02000000u /* C */
/* An additional compile options word is available in the compile context. */

View File

@ -138,6 +138,7 @@ D is inspected during pcre2_dfa_match() execution
#define PCRE2_ALT_VERBNAMES 0x00400000u /* C */
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
#define PCRE2_LITERAL 0x02000000u /* C */
/* An additional compile options word is available in the compile context. */

View File

@ -696,13 +696,18 @@ static int posix_substitutes[] = {
(PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED| \
PCRE2_EXTENDED_MORE|PCRE2_FIRSTLINE| \
PCRE2_EXTENDED_MORE|PCRE2_FIRSTLINE|PCRE2_LITERAL| \
PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
PCRE2_NEVER_UCP|PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE| \
PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
PCRE2_NO_UTF_CHECK|PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
PCRE2_UTF)
#define PUBLIC_LITERAL_COMPILE_OPTIONS \
(PCRE2_ANCHORED|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_ENDANCHORED| \
PCRE2_FIRSTLINE|PCRE2_LITERAL|PCRE2_NO_START_OPTIMIZE| \
PCRE2_NO_UTF_CHECK|PCRE2_USE_OFFSET_LIMIT|PCRE2_UTF)
/* Compile time error code numbers. They are given names so that they can more
easily be tracked. When a new number is added, the tables called eint1 and
eint2 in pcre2posix.c may need to be updated, and a new error text must be
@ -718,7 +723,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
ERR91};
ERR91, ERR92};
/* This is a table of start-of-pattern options such as (*UTF) and settings such
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@ -2170,7 +2175,7 @@ the parsed pattern.
Arguments:
ptr current pattern pointer
pcalloutptr points to a pointer to previous callout, or NULL
options the compiling options
auto_callout TRUE if auto_callouts are enabled
parsed_pattern the parsed pattern pointer
cb compile block
@ -2178,7 +2183,7 @@ Returns: possibly updated parsed_pattern pointer.
*/
static uint32_t *
manage_callouts(PCRE2_SPTR ptr, uint32_t **pcalloutptr, uint32_t options,
manage_callouts(PCRE2_SPTR ptr, uint32_t **pcalloutptr, BOOL auto_callout,
uint32_t *parsed_pattern, compile_block *cb)
{
uint32_t *previous_callout = *pcalloutptr;
@ -2186,7 +2191,7 @@ uint32_t *previous_callout = *pcalloutptr;
if (previous_callout != NULL) previous_callout[2] = ptr - cb->start_pattern -
(PCRE2_SIZE)previous_callout[1];
if ((options & PCRE2_AUTO_CALLOUT) == 0) previous_callout = NULL; else
if (!auto_callout) previous_callout = NULL; else
{
if (previous_callout == NULL ||
previous_callout != parsed_pattern - 4 ||
@ -2288,15 +2293,44 @@ int i;
BOOL inescq = FALSE;
BOOL inverbname = FALSE;
BOOL utf = (options & PCRE2_UTF) != 0;
BOOL auto_callout = (options & PCRE2_AUTO_CALLOUT) != 0;
BOOL isdupname;
BOOL negate_class;
BOOL okquantifier = FALSE;
PCRE2_SPTR thisptr;
PCRE2_SPTR name;
PCRE2_SPTR ptrend = cb->end_pattern;
PCRE2_SPTR verbnamestart = NULL; /* Value avoids compiler warning */
named_group *ng;
nest_save *top_nest = NULL;
nest_save *end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size);
nest_save *top_nest, *end_nests;
/* If the pattern is actually a literal string, process it separately to avoid
cluttering up the main loop. */
if ((options & PCRE2_LITERAL) != 0)
{
while (ptr < ptrend)
{
if (parsed_pattern >= parsed_pattern_end)
{
errorcode = ERR63; /* Internal error (parsed pattern overflow) */
goto FAILED;
}
thisptr = ptr;
GETCHARINCTEST(c, ptr);
if (auto_callout)
parsed_pattern = manage_callouts(thisptr, &previous_callout,
auto_callout, parsed_pattern, cb);
PARSED_LITERAL(c, parsed_pattern);
}
*parsed_pattern = META_END;
return 0;
}
/* Process a real regex which may contain meta-characters. */
top_nest = NULL;
end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size);
/* The size of the nest_save structure might not be a factor of the size of the
workspace. Therefore we must round down end_nests so as to correctly avoid
@ -2311,8 +2345,6 @@ if ((options & PCRE2_EXTENDED_MORE) != 0) options |= PCRE2_EXTENDED;
/* Now scan the pattern */
*has_lookbehind = FALSE;
while (ptr < ptrend)
{
int prev_expect_cond_assert;
@ -2322,7 +2354,6 @@ while (ptr < ptrend)
uint32_t prev_meta_quantifier;
BOOL prev_okquantifier;
PCRE2_SPTR tempptr;
PCRE2_SPTR thisptr;
PCRE2_SIZE offset;
if (parsed_pattern >= parsed_pattern_end)
@ -2334,7 +2365,7 @@ while (ptr < ptrend)
if (nest_depth > cb->cx->parens_nest_limit)
{
errorcode = ERR19;
goto FAILED;
goto FAILED; /* Parentheses too deeply nested */
}
/* Get next input character, save its position for callout handling. */
@ -2361,8 +2392,8 @@ while (ptr < ptrend)
goto FAILED;
}
if (!inverbname && after_manual_callout-- <= 0)
parsed_pattern = manage_callouts(thisptr, &previous_callout, options,
parsed_pattern, cb);
parsed_pattern = manage_callouts(thisptr, &previous_callout,
auto_callout, parsed_pattern, cb);
PARSED_LITERAL(c, parsed_pattern);
meta_quantifier = 0;
}
@ -2507,7 +2538,7 @@ while (ptr < ptrend)
!read_repeat_counts(&tempptr, ptrend, NULL, NULL, &errorcode))))
{
if (after_manual_callout-- <= 0)
parsed_pattern = manage_callouts(thisptr, &previous_callout, options,
parsed_pattern = manage_callouts(thisptr, &previous_callout, auto_callout,
parsed_pattern, cb);
}
@ -4135,7 +4166,7 @@ if (inverbname && ptr >= ptrend)
/* Manage callout for the final item */
parsed_pattern = manage_callouts(ptr, &previous_callout, options,
parsed_pattern = manage_callouts(ptr, &previous_callout, auto_callout,
parsed_pattern, cb);
/* Terminate the parsed pattern, then return success if all groups are closed.
@ -8891,7 +8922,7 @@ pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
{
BOOL utf; /* Set TRUE for UTF mode */
BOOL has_lookbehind; /* Set TRUE if a lookbehind is found */
BOOL has_lookbehind = FALSE; /* Set TRUE if a lookbehind is found */
BOOL zero_terminated; /* Set TRUE for zero-terminated pattern */
pcre2_real_code *re = NULL; /* What we will return */
compile_block cb; /* "Static" compile-time data */
@ -8961,6 +8992,13 @@ if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
return NULL;
}
if ((options & PCRE2_LITERAL) != 0 &&
(options & ~PUBLIC_LITERAL_COMPILE_OPTIONS) != 0)
{
*errorptr = ERR92;
return NULL;
}
/* A NULL compile context means "use a default context" */
if (ccontext == NULL)
@ -9039,10 +9077,11 @@ for (i = 0; i < 10; i++) cb.small_ref_offset[i] = PCRE2_UNSET;
/* --------------- Start looking at the pattern --------------- */
/* Check for global one-time option settings at the start of the pattern, and
remember the offset to the actual regex. With valgrind support, make the
terminator of a zero-terminated pattern inaccessible. This catches bugs that
would otherwise only show up for non-zero-terminated patterns. */
/* Unless PCRE2_LITERAL is set, check for global one-time option settings at
the start of the pattern, and remember the offset to the actual regex. With
valgrind support, make the terminator of a zero-terminated pattern
inaccessible. This catches bugs that would otherwise only show up for
non-zero-terminated patterns. */
#ifdef SUPPORT_VALGRIND
if (zero_terminated) VALGRIND_MAKE_MEM_NOACCESS(pattern + patlen, CU2BYTES(1));
@ -9051,19 +9090,21 @@ if (zero_terminated) VALGRIND_MAKE_MEM_NOACCESS(pattern + patlen, CU2BYTES(1));
ptr = pattern;
skipatstart = 0;
while (patlen - skipatstart >= 2 &&
if ((options & PCRE2_LITERAL) == 0)
{
while (patlen - skipatstart >= 2 &&
ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
ptr[skipatstart+1] == CHAR_ASTERISK)
{
for (i = 0; i < sizeof(pso_list)/sizeof(pso); i++)
{
uint32_t c, pp;
pso *p = pso_list + i;
if (patlen - skipatstart - 2 >= p->length &&
PRIV(strncmp_c8)(ptr+skipatstart+2, (char *)(p->name), p->length) == 0)
PRIV(strncmp_c8)(ptr + skipatstart + 2, (char *)(p->name),
p->length) == 0)
{
uint32_t c, pp;
skipatstart += p->length + 2;
switch(p->type)
{
@ -9118,6 +9159,7 @@ while (patlen - skipatstart >= 2 &&
}
if (i >= sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */
}
}
/* End of pattern-start options; advance to start of real regex. */

View File

@ -177,6 +177,7 @@ static const unsigned char compile_error_texts[] =
/* 90 */
"internal error: bad code value in parsed_skip()\0"
"PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
"invalid option bits with PCRE2_LITERAL\0"
;
/* Match-time and UTF error texts are in the same format. */

View File

@ -142,6 +142,7 @@ static const int eint2[] = {
32, REG_INVARG, /* this version of PCRE2 does not have Unicode support */
37, REG_EESCAPE, /* PCRE2 does not support \L, \l, \N{name}, \U, or \u */
56, REG_INVARG, /* internal error: unknown newline setting */
92, REG_INVARG, /* invalid option bits with PCRE2_LITERAL */
};
/* Table of texts corresponding to POSIX error codes */
@ -242,6 +243,7 @@ patlen = ((cflags & REG_PEND) != 0)? (PCRE2_SIZE)(preg->re_endp - pattern) :
if ((cflags & REG_ICASE) != 0) options |= PCRE2_CASELESS;
if ((cflags & REG_NEWLINE) != 0) options |= PCRE2_MULTILINE;
if ((cflags & REG_DOTALL) != 0) options |= PCRE2_DOTALL;
if ((cflags & REG_NOSPEC) != 0) options |= PCRE2_LITERAL;
if ((cflags & REG_UTF) != 0) options |= PCRE2_UTF;
if ((cflags & REG_UCP) != 0) options |= PCRE2_UCP;
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY;
@ -263,7 +265,7 @@ if (preg->re_pcre2_code == NULL)
if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
return eint1[errorcode];
for (i = 0; i < sizeof(eint2)/(2*sizeof(const int)); i += 2)
for (i = 0; i < sizeof(eint2)/sizeof(const int); i += 2)
if (errorcode == eint2[i]) return eint2[i+1];
return REG_BADPAT;
}

View File

@ -63,6 +63,7 @@ extern "C" {
#define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE2_UNGREEDY */
#define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE2_UCP */
#define REG_PEND 0x0800 /* GNU feature: pass end pattern by re_endp */
#define REG_NOSPEC 0x1000 /* Maps to PCRE2_LITERAL */
/* This is not used by PCRE2, but by defining it we make it easier
to slot PCRE2 into existing programs that make POSIX calls. */

View File

@ -634,6 +634,7 @@ static modstruct modlist[] = {
{ "jitfast", MOD_PAT, MOD_CTL, CTL_JITFAST, PO(control) },
{ "jitstack", MOD_PNDP, MOD_INT, 0, PO(jitstack) },
{ "jitverify", MOD_PAT, MOD_CTL, CTL_JITVERIFY, PO(control) },
{ "literal", MOD_PAT, MOD_OPT, PCRE2_LITERAL, PO(options) },
{ "locale", MOD_PAT, MOD_STR, LOCALESIZE, PO(locale) },
{ "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
{ "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
@ -696,8 +697,8 @@ static modstruct modlist[] = {
/* Controls and options that are supported for use with the POSIX interface. */
#define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_MULTILINE|PCRE2_UCP|PCRE2_UTF| \
PCRE2_UNGREEDY)
PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \
PCRE2_UTF|PCRE2_UNGREEDY)
#define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
@ -4030,7 +4031,7 @@ static void
show_compile_options(uint32_t options, const char *before, const char *after)
{
if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
before,
((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
@ -4046,6 +4047,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%
((options & PCRE2_EXTENDED) != 0)? " extended" : "",
((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
((options & PCRE2_LITERAL) != 0)? " literal" : "",
((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
@ -4905,6 +4907,7 @@ uint8_t *p = buffer;
unsigned int delimiter = *p++;
int errorcode;
void *use_pat_context;
uint32_t use_forbid_utf = forbid_utf;
PCRE2_SIZE patlen;
PCRE2_SIZE valgrind_access_length;
PCRE2_SIZE erroroffset;
@ -5263,6 +5266,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
@ -5535,6 +5539,11 @@ NULL context. */
use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
NULL : PTR(pat_context);
/* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
and PCRE2_NEVER_UCP are invalid with it. */
if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
/* Compile many times when timing. */
if (timeit > 0)
@ -5545,7 +5554,8 @@ if (timeit > 0)
{
clock_t start_time = clock();
PCRE2_COMPILE(compiled_code, pbuffer, patlen,
pat_patctl.options|forbid_utf, &errorcode, &erroroffset, use_pat_context);
pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
use_pat_context);
time_taken += clock() - start_time;
if (TEST(compiled_code, !=, NULL))
{ SUB1(pcre2_code_free, compiled_code); }
@ -5558,7 +5568,7 @@ if (timeit > 0)
/* A final compile that is used "for real". */
PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|forbid_utf,
PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|use_forbid_utf,
&errorcode, &erroroffset, use_pat_context);
/* Call the JIT compiler if requested. When timing, we must free and recompile
@ -5576,7 +5586,7 @@ if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
clock_t start_time;
SUB1(pcre2_code_free, compiled_code);
PCRE2_COMPILE(compiled_code, pbuffer, patlen,
pat_patctl.options|forbid_utf, &errorcode, &erroroffset,
pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
use_pat_context);
start_time = clock();
PCRE2_JIT_COMPILE(jitrc,compiled_code, pat_patctl.jit);

View File

@ -129,4 +129,9 @@
/ABC/use_length
ABC
/a\b(c/literal,posix
a\\b(c
/a\b(c/literal,posix,dotall
# End of testdata/testinput18

35
testdata/testinput2 vendored
View File

@ -5292,4 +5292,39 @@ a)"xI
# ----------------------------------------------------------------------
/a\b(c/literal
a\\b(c
/a\b(c/literal,caseless
a\\b(c
a\\B(c
/a\b(c/literal,firstline
XYYa\\b(c
\= Expect no match
X\na\\b(c
/a\b?c/literal,use_offset_limit
XXXXa\\b?c\=offset_limit=5
\= Expect no match
XXXXa\\b?c\=offset_limit=3
/a\b(c/literal,anchored,endanchored
a\\b(c
\= Expect no match
Xa\\b(c
a\\b(cX
Xa\\b(cX
//literal,extended
/a\b(c/literal,auto_callout,no_start_optimize
XXXXa\\b(c
/a\b(c/literal,auto_callout
XXXXa\\b(c
/(*CR)abc/literal
(*CR)abc
# End of testinput2

3
testdata/testinput5 vendored
View File

@ -2024,4 +2024,7 @@
# ----------------------------------------------------------------------
/Aሴ+B/literal,utf,no_utf_check
Aሴ+B
# End of testinput5

View File

@ -199,4 +199,11 @@ No match: POSIX code 17: match failed
ABC
0: ABC
/a\b(c/literal,posix
a\\b(c
0: a\b(c
/a\b(c/literal,posix,dotall
Failed: POSIX code 16: bad argument at offset 0
# End of testdata/testinput18

68
testdata/testoutput2 vendored
View File

@ -16015,6 +16015,72 @@ Failed: error 108 at offset 4: range out of order in character class
# ----------------------------------------------------------------------
/a\b(c/literal
a\\b(c
0: a\b(c
/a\b(c/literal,caseless
a\\b(c
0: a\b(c
a\\B(c
0: a\B(c
/a\b(c/literal,firstline
XYYa\\b(c
0: a\b(c
\= Expect no match
X\na\\b(c
No match
/a\b?c/literal,use_offset_limit
XXXXa\\b?c\=offset_limit=5
0: a\b?c
\= Expect no match
XXXXa\\b?c\=offset_limit=3
No match
/a\b(c/literal,anchored,endanchored
a\\b(c
0: a\b(c
\= Expect no match
Xa\\b(c
No match
a\\b(cX
No match
Xa\\b(cX
No match
//literal,extended
Failed: error 192 at offset 0: invalid option bits with PCRE2_LITERAL
/a\b(c/literal,auto_callout,no_start_optimize
XXXXa\\b(c
--->XXXXa\b(c
+0 ^ a
+0 ^ a
+0 ^ a
+0 ^ a
+0 ^ a
+1 ^^ \
+2 ^ ^ b
+3 ^ ^ (
+4 ^ ^
0: a\b(c
/a\b(c/literal,auto_callout
XXXXa\\b(c
--->XXXXa\b(c
+0 ^ a
+1 ^^ \
+2 ^ ^ b
+3 ^ ^ (
+4 ^ ^
0: a\b(c
/(*CR)abc/literal
(*CR)abc
0: (*CR)abc
# End of testinput2
Error -65: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
@ -16024,4 +16090,4 @@ Error 0: PCRE2_ERROR_BADDATA (unknown error number)
Error 100: no error
Error 101: \ at end of pattern
Error 191: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
Error 192: PCRE2_ERROR_BADDATA (unknown error number)
Error 200: PCRE2_ERROR_BADDATA (unknown error number)

View File

@ -4600,4 +4600,8 @@ No match
# ----------------------------------------------------------------------
/Aሴ+B/literal,utf,no_utf_check
Aሴ+B
0: A\x{1234}+B
# End of testinput5