Code for new interpreter (build system and documentation still to be done).
This commit is contained in:
parent
53bf29d689
commit
20804215a8
17
ChangeLog
17
ChangeLog
|
@ -2,15 +2,24 @@ Change Log for PCRE2
|
|||
--------------------
|
||||
|
||||
|
||||
Version 10.24 14-February-2017
|
||||
------------------------------
|
||||
Version 10.30-DEV 09-March-2017
|
||||
-------------------------------
|
||||
|
||||
1. Hardened pcre2test so as to reduce the number of bugs reported by fuzzers:
|
||||
1. The main interpreter, pcre2_match(), has been refactored into a new version
|
||||
that does not use recursive function calls (and therefore the stack) for
|
||||
remembering backtracking positions. This makes --disable-stack-for-recursion a
|
||||
NOOP. The new implementation allows backtracking into recursive group calls in
|
||||
patterns, making it more compatible with Perl, and also fixes some other
|
||||
hard-to-do issues such as #1887 in Bugzilla. The code is also cleaner because
|
||||
the old code had a number of fudges to try to reduce stack usage. It seems to
|
||||
run no slower than the old code.
|
||||
|
||||
2. Hardened pcre2test so as to reduce the number of bugs reported by fuzzers:
|
||||
|
||||
(a) Check for malloc failures when getting memory for the ovector (POSIX) or
|
||||
the match data block (non-POSIX).
|
||||
|
||||
2. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property
|
||||
3. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property
|
||||
for a character with a code point greater than 0x10ffff (the Unicode maximum)
|
||||
caused a crash.
|
||||
|
||||
|
|
|
@ -9,9 +9,9 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
|
|||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||
|
||||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [24])
|
||||
m4_define(pcre2_prerelease, [-RC1])
|
||||
m4_define(pcre2_date, [2017-02-15])
|
||||
m4_define(pcre2_minor, [30])
|
||||
m4_define(pcre2_prerelease, [-DEV])
|
||||
m4_define(pcre2_date, [2017-03-05])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
|
|
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 24
|
||||
#define PCRE2_PRERELEASE -RC1
|
||||
#define PCRE2_DATE 2017-02-15
|
||||
#define PCRE2_MINOR 30
|
||||
#define PCRE2_PRERELEASE -DEV
|
||||
#define PCRE2_DATE 2017-03-05
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -6209,24 +6209,6 @@ for (;; pptr++)
|
|||
tempcode = previous;
|
||||
op_previous = *previous;
|
||||
|
||||
/* If previous was a recursion call, wrap it in atomic brackets so that
|
||||
previous becomes the atomic group. All recursions were so wrapped in the
|
||||
past, but it no longer happens for non-repeated recursions. In fact, the
|
||||
repeated ones could be re-implemented independently so as not to need this,
|
||||
but for the moment we rely on the code for repeating groups. */
|
||||
|
||||
if (op_previous == OP_RECURSE)
|
||||
{
|
||||
memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
|
||||
op_previous = *previous = OP_ONCE;
|
||||
PUT(previous, 1, 2 + 2*LINK_SIZE);
|
||||
previous[2 + 2*LINK_SIZE] = OP_KET;
|
||||
PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE);
|
||||
code += 2 + 2 * LINK_SIZE;
|
||||
length_prevgroup = 3 + 3*LINK_SIZE;
|
||||
group_return = -1; /* Set "may match empty string" */
|
||||
}
|
||||
|
||||
/* Now handle repetition for the different types of item. */
|
||||
|
||||
switch (op_previous)
|
||||
|
@ -6311,6 +6293,77 @@ for (;; pptr++)
|
|||
case OP_FAIL:
|
||||
goto END_REPEAT;
|
||||
|
||||
/* Prior to 10.30, repeated recursions were wrapped in OP_ONCE brackets
|
||||
because pcre2_match() could not handle backtracking into recursively
|
||||
called groups. Now that this backtracking is available, we no longer need
|
||||
to do this. However, we still need to replicate recursions as we do for
|
||||
groups so as to have independent backtracking points. We can replicate
|
||||
for the minimum number of repeats directly. For optional repeats we now
|
||||
wrap the recursion in OP_BRA brackets and make use of the bracket
|
||||
repetition. */
|
||||
|
||||
case OP_RECURSE:
|
||||
|
||||
/* Generate unwrapped repeats for a non-zero minimum, except when the
|
||||
minimum is 1 and the maximum unlimited, because that can be handled with
|
||||
OP_BRA terminated by OP_KETRMAX/MIN. When the maximum is equal to the
|
||||
minimum, we just need to generate the appropriate additional copies.
|
||||
Otherwise we need to generate one more, to simulate the situation when
|
||||
the minimum is zero. */
|
||||
|
||||
if (repeat_min > 0 && (repeat_min != 1 || repeat_max != REPEAT_UNLIMITED))
|
||||
{
|
||||
int replicate = repeat_min;
|
||||
if (repeat_min == repeat_max) replicate--;
|
||||
|
||||
/* In the pre-compile phase, we don't actually do the replication. We
|
||||
just adjust the length as if we had. Do some paranoid checks for
|
||||
potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
|
||||
integer type when available, otherwise double. */
|
||||
|
||||
if (lengthptr != NULL)
|
||||
{
|
||||
PCRE2_SIZE delta = replicate*(1 + LINK_SIZE);
|
||||
if ((INT64_OR_DOUBLE)replicate*
|
||||
(INT64_OR_DOUBLE)(1 + LINK_SIZE) >
|
||||
(INT64_OR_DOUBLE)INT_MAX ||
|
||||
OFLOW_MAX - *lengthptr < delta)
|
||||
{
|
||||
*errorcodeptr = ERR20;
|
||||
return 0;
|
||||
}
|
||||
*lengthptr += delta;
|
||||
}
|
||||
|
||||
else for (i = 0; i < replicate; i++)
|
||||
{
|
||||
memcpy(code, previous, CU2BYTES(1 + LINK_SIZE));
|
||||
previous = code;
|
||||
code += 1 + LINK_SIZE;
|
||||
}
|
||||
|
||||
/* If the number of repeats is fixed, we are done. Otherwise, adjust
|
||||
the counts and fall through. */
|
||||
|
||||
if (repeat_min == repeat_max) break;
|
||||
if (repeat_max != REPEAT_UNLIMITED) repeat_max -= repeat_min;
|
||||
repeat_min = 0;
|
||||
}
|
||||
|
||||
/* Wrap the recursion call in OP_BRA brackets. */
|
||||
|
||||
memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
|
||||
op_previous = *previous = OP_BRA;
|
||||
PUT(previous, 1, 2 + 2*LINK_SIZE);
|
||||
previous[2 + 2*LINK_SIZE] = OP_KET;
|
||||
PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE);
|
||||
code += 2 + 2 * LINK_SIZE;
|
||||
length_prevgroup = 3 + 3*LINK_SIZE;
|
||||
group_return = -1; /* Set "may match empty string" */
|
||||
|
||||
/* Now fall through and treat as a repeated OP_BRA. */
|
||||
/* VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV */
|
||||
|
||||
/* If previous was a bracket group, we may have to replicate it in
|
||||
certain cases. Note that at this point we can encounter only the "basic"
|
||||
bracket opcodes such as BRA and CBRA, as this is the place where they get
|
||||
|
@ -6340,10 +6393,10 @@ for (;; pptr++)
|
|||
previous[GET(previous, 1)] != OP_ALT)
|
||||
goto END_REPEAT;
|
||||
|
||||
/* There is no sense in actually repeating assertions. The only potential
|
||||
use of repetition is in cases when the assertion is optional. Therefore,
|
||||
if the minimum is greater than zero, just ignore the repeat. If the
|
||||
maximum is not zero or one, set it to 1. */
|
||||
/* There is no sense in actually repeating assertions. The only
|
||||
potential use of repetition is in cases when the assertion is optional.
|
||||
Therefore, if the minimum is greater than zero, just ignore the repeat.
|
||||
If the maximum is not zero or one, set it to 1. */
|
||||
|
||||
if (op_previous < OP_ONCE) /* Assertion */
|
||||
{
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -54,6 +54,7 @@ just to undefine them all. */
|
|||
#undef ACROSSCHAR
|
||||
#undef BACKCHAR
|
||||
#undef BYTES2CU
|
||||
#undef CHMAX_255
|
||||
#undef CU2BYTES
|
||||
#undef FORWARDCHAR
|
||||
#undef FORWARDCHARTEST
|
||||
|
@ -201,20 +202,25 @@ arithmetic results in a signed value. Hence the cast. */
|
|||
|
||||
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
|
||||
whether its argument, which is assumed to be one code unit, is less than 256.
|
||||
The maximum length of a MARK name must fit in one code unit; currently it is
|
||||
set to 255 or 65535. The TABLE_GET macro is used to access elements of tables
|
||||
containing exactly 256 items. When code points can be greater than 255, a check
|
||||
is needed before accessing these tables. */
|
||||
The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK
|
||||
name must fit in one code unit; currently it is set to 255 or 65535. The
|
||||
TABLE_GET macro is used to access elements of tables containing exactly 256
|
||||
items. When code points can be greater than 255, a check is needed before
|
||||
accessing these tables. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAX_255(c) TRUE
|
||||
#define MAX_MARK ((1u << 8) - 1)
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
#define CHMAX_255(c) ((c) <= 255u)
|
||||
#else
|
||||
#define CHMAX_255(c) TRUE
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
#define TABLE_GET(c, table, default) ((table)[c])
|
||||
|
||||
#else /* Code units are 16 or 32 bits */
|
||||
#define CHMAX_255(c) ((c) <= 255u)
|
||||
#define MAX_255(c) ((c) <= 255u)
|
||||
#define MAX_MARK ((1u << 16) - 1)
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
|
@ -740,27 +746,8 @@ typedef struct pcre2_real_jit_stack {
|
|||
void* stack;
|
||||
} pcre2_real_jit_stack;
|
||||
|
||||
/* Structure for keeping a chain of heap blocks used for saving ovectors
|
||||
during pattern recursion when the ovector is larger than can be saved on
|
||||
the system stack. */
|
||||
|
||||
typedef struct ovecsave_frame {
|
||||
struct ovecsave_frame *next; /* Next frame on free chain */
|
||||
PCRE2_SIZE saved_ovec[1]; /* First vector element */
|
||||
} ovecsave_frame;
|
||||
|
||||
/* Structure for items in a linked list that represents an explicit recursive
|
||||
call within the pattern; used by pcre_match(). */
|
||||
|
||||
typedef struct recursion_info {
|
||||
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
||||
unsigned int group_num; /* Number of group that was called */
|
||||
PCRE2_SIZE *ovec_save; /* Pointer to saved ovector frame */
|
||||
uint32_t saved_capture_last; /* Last capture number */
|
||||
PCRE2_SPTR subject_position; /* Position at start of recursion */
|
||||
} recursion_info;
|
||||
|
||||
/* A similar structure for pcre_dfa_match(). */
|
||||
call within the pattern when running pcre_dfa_match(). */
|
||||
|
||||
typedef struct dfa_recursion_info {
|
||||
struct dfa_recursion_info *prevrec;
|
||||
|
@ -768,25 +755,63 @@ typedef struct dfa_recursion_info {
|
|||
uint32_t group_num;
|
||||
} dfa_recursion_info;
|
||||
|
||||
/* Structure for building a chain of data for holding the values of the subject
|
||||
pointer at the start of each subpattern, so as to detect when an empty string
|
||||
has been matched by a subpattern - to break infinite loops; used by
|
||||
pcre2_match(). */
|
||||
/* Structure for "stack" frames that are used for remembering backtracking
|
||||
positions during matching. As these are used in a vector, with the ovector item
|
||||
being extended, the size of the structure must be a multiple of PCRE2_SIZE. The
|
||||
only way to check this at compile time is to force an error by generating an
|
||||
array with a negative size. By putting this in a typedef (which is never used),
|
||||
we don't generate any code when all is well. */
|
||||
|
||||
typedef struct eptrblock {
|
||||
struct eptrblock *epb_prev;
|
||||
PCRE2_SPTR epb_saved_eptr;
|
||||
} eptrblock;
|
||||
typedef struct heapframe {
|
||||
|
||||
/* The first set of fields are variables that have to be preserved over calls
|
||||
to RRMATCH(), but which do not need to be copied to new frames. */
|
||||
|
||||
PCRE2_SPTR ecode; /* The current position in the pattern */
|
||||
PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */
|
||||
PCRE2_SIZE length; /* Used for character, string, or code lengths */
|
||||
PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */
|
||||
PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */
|
||||
uint32_t rdepth; /* "Recursion" depth */
|
||||
uint32_t group_frame_type; /* Type information for group frames */
|
||||
uint32_t temp_32[4]; /* Used for short-term 32-bit or BOOL values */
|
||||
uint16_t return_id; /* Where to go on in internal "return" */
|
||||
uint16_t op; /* Processing opcode */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
PCRE2_UCHAR occu[6]; /* Used for other case code units */
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
PCRE2_UCHAR occu[2]; /* Used for other case code units */
|
||||
#else
|
||||
PCRE2_UCHAR occu[1]; /* Used for other case code units */
|
||||
#endif
|
||||
|
||||
/* The rest have to be copied from the previous frame whenever a new frame
|
||||
becomes current. */
|
||||
|
||||
PCRE2_SPTR eptr; /* MUST BE FIRST */
|
||||
PCRE2_SPTR start_match; /* Can be adjusted by \K */
|
||||
PCRE2_SPTR mark; /* Most recent mark on the success path */
|
||||
uint32_t current_recurse; /* Current (deepest) recursion number */
|
||||
uint32_t capture_last; /* Most recent capture */
|
||||
PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */
|
||||
PCRE2_SIZE offset_top; /* Offset after highest capture */
|
||||
PCRE2_SIZE ovector[2]; /* Must be last in the structure */
|
||||
} heapframe;
|
||||
|
||||
typedef char check_heapframe_size[
|
||||
((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing traditional NFA matching (pcre2_match() and friends). */
|
||||
|
||||
typedef struct match_block {
|
||||
pcre2_memctl memctl; /* For general use */
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
pcre2_memctl stack_memctl; /* For "stack" frames */
|
||||
#endif
|
||||
uint32_t match_call_count; /* As it says */
|
||||
PCRE2_SIZE frame_vector_size; /* Size of a backtracking frame */
|
||||
heapframe *match_frames; /* Points to vector of frames */
|
||||
heapframe *match_frames_top; /* Points after the end of the vector */
|
||||
heapframe *stack_frames; /* The original vector on the stack */
|
||||
uint32_t match_call_count; /* Number of times a new frame is created */
|
||||
uint32_t match_limit; /* As it says */
|
||||
uint32_t match_limit_recursion; /* As it says */
|
||||
BOOL hitend; /* Hit the end of the subject at some point */
|
||||
|
@ -794,9 +819,6 @@ typedef struct match_block {
|
|||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
const uint8_t *fcc; /* Points to case-flipping table */
|
||||
const uint8_t *ctypes; /* Points to table of type maps */
|
||||
PCRE2_SIZE *ovector; /* Pointer to the offset vector */
|
||||
PCRE2_SIZE offset_end; /* One past the end */
|
||||
PCRE2_SIZE offset_max; /* The maximum usable for return data */
|
||||
PCRE2_SIZE start_offset; /* The start offset value */
|
||||
PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */
|
||||
uint16_t partial; /* PARTIAL options */
|
||||
|
@ -807,30 +829,22 @@ typedef struct match_block {
|
|||
PCRE2_SPTR start_code; /* For use when recursing */
|
||||
PCRE2_SPTR start_subject; /* Start of the subject string */
|
||||
PCRE2_SPTR end_subject; /* End of the subject string */
|
||||
PCRE2_SPTR start_match_ptr; /* Start of matched string */
|
||||
PCRE2_SPTR verb_ecode_ptr; /* For passing back info */
|
||||
PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */
|
||||
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
|
||||
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
|
||||
PCRE2_SPTR once_target; /* Where to back up to for atomic groups */
|
||||
uint32_t moptions; /* Match options */
|
||||
uint32_t poptions; /* Pattern options */
|
||||
uint32_t capture_last; /* Most recent capture number + overflow flag */
|
||||
uint32_t skip_arg_count; /* For counting SKIP_ARGs */
|
||||
uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */
|
||||
uint32_t match_function_type; /* Set for certain special calls of match() */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
|
||||
recursion_info *recursive; /* Linked list of recursion data */
|
||||
ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
int (*callout)(pcre2_callout_block *,void *); /* Callout function or NULL */
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
void *match_frames_base; /* For remembering malloc'd frames */
|
||||
#endif
|
||||
} match_block;
|
||||
|
||||
/* A similar structure is used for the same purpose by the DFA matching
|
||||
|
|
|
@ -707,7 +707,7 @@ static struct regression_test_case regression_test_cases[] = {
|
|||
{ MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
|
||||
{ MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
|
||||
{ MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
|
||||
// { MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
|
||||
{ MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
|
||||
{ MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
|
||||
{ MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
|
||||
|
@ -722,7 +722,7 @@ static struct regression_test_case regression_test_cases[] = {
|
|||
{ MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
|
||||
{ MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
|
||||
{ MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
|
||||
{ MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
|
||||
// { MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
|
||||
{ MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
|
||||
|
||||
/* 16 bit specific tests. */
|
||||
|
@ -848,7 +848,8 @@ static struct regression_test_case regression_test_cases[] = {
|
|||
{ MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
|
||||
|
||||
/* Deep recursion: Stack limit reached. */
|
||||
{ M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
|
||||
// { M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
||||
{ M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
||||
|
@ -1309,9 +1310,9 @@ static int regression_tests(void)
|
|||
} else {
|
||||
ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
|
||||
ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
|
||||
for (i = 0; i < OVECTOR_SIZE * 3; ++i)
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector8_1[i] = -2;
|
||||
for (i = 0; i < OVECTOR_SIZE * 3; ++i)
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector8_2[i] = -2;
|
||||
}
|
||||
if (re8) {
|
||||
|
@ -1348,9 +1349,9 @@ static int regression_tests(void)
|
|||
} else {
|
||||
ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
|
||||
ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
|
||||
for (i = 0; i < OVECTOR_SIZE * 3; ++i)
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector16_1[i] = -2;
|
||||
for (i = 0; i < OVECTOR_SIZE * 3; ++i)
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector16_2[i] = -2;
|
||||
}
|
||||
if (re16) {
|
||||
|
@ -1392,9 +1393,9 @@ static int regression_tests(void)
|
|||
} else {
|
||||
ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
|
||||
ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
|
||||
for (i = 0; i < OVECTOR_SIZE * 3; ++i)
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector32_1[i] = -2;
|
||||
for (i = 0; i < OVECTOR_SIZE * 3; ++i)
|
||||
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
||||
ovector32_2[i] = -2;
|
||||
}
|
||||
if (re32) {
|
||||
|
|
8070
src/pcre2_match.c
8070
src/pcre2_match.c
File diff suppressed because it is too large
Load Diff
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -51,7 +51,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
* Create a match data block given ovector size *
|
||||
*************************************************/
|
||||
|
||||
/* A minimum of 1 is imposed on the number of ovector triplets. */
|
||||
/* A minimum of 1 is imposed on the number of ovector pairs. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
|
||||
|
@ -59,7 +59,7 @@ pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
|
|||
pcre2_match_data *yield;
|
||||
if (oveccount < 1) oveccount = 1;
|
||||
yield = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_match_data) + 3*oveccount*sizeof(PCRE2_SIZE),
|
||||
sizeof(pcre2_match_data) + 2*oveccount*sizeof(PCRE2_SIZE),
|
||||
(pcre2_memctl *)gcontext);
|
||||
if (yield == NULL) return NULL;
|
||||
yield->oveccount = oveccount;
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -46,10 +46,8 @@ collecting data (e.g. minimum matching length). */
|
|||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "pcre2_internal.h"
|
||||
|
||||
|
||||
/* The maximum remembered capturing brackets minimum. */
|
||||
|
||||
#define MAX_CACHE_BACKREF 128
|
||||
|
@ -158,12 +156,12 @@ for (;;)
|
|||
}
|
||||
goto PROCESS_NON_CAPTURE;
|
||||
|
||||
/* There's a special case of OP_ONCE, when it is wrapped round an
|
||||
case OP_BRA:
|
||||
/* There's a special case of OP_BRA, when it is wrapped round a repeated
|
||||
OP_RECURSE. We'd like to process the latter at this level so that
|
||||
remembering the value works for repeated cases. So we do nothing, but
|
||||
set a fudge value to skip over the OP_KET after the recurse. */
|
||||
|
||||
case OP_ONCE:
|
||||
if (cc[1+LINK_SIZE] == OP_RECURSE && cc[2*(1+LINK_SIZE)] == OP_KET)
|
||||
{
|
||||
once_fudge = 1 + LINK_SIZE;
|
||||
|
@ -172,8 +170,8 @@ for (;;)
|
|||
}
|
||||
/* Fall through */
|
||||
|
||||
case OP_ONCE:
|
||||
case OP_ONCE_NC:
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
case OP_BRAPOS:
|
||||
case OP_SBRAPOS:
|
||||
|
@ -789,6 +787,7 @@ if (utf)
|
|||
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
@ -801,10 +800,12 @@ if (caseless)
|
|||
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Not UTF */
|
||||
|
||||
else if (MAX_255(c)) SET_BIT(re->tables[fcc_offset + c]);
|
||||
if (MAX_255(c)) SET_BIT(re->tables[fcc_offset + c]);
|
||||
}
|
||||
|
||||
return p;
|
||||
|
|
|
@ -5361,7 +5361,7 @@ if (callout_capture)
|
|||
if (cb->callout_string == NULL)
|
||||
fprintf(outfile, "Callout %d:", cb->callout_number);
|
||||
fprintf(outfile, " last capture = %d\n", cb->capture_last);
|
||||
for (i = 0; i < cb->capture_top * 2; i += 2)
|
||||
for (i = 2; i < cb->capture_top * 2; i += 2)
|
||||
{
|
||||
fprintf(outfile, "%2d: ", i/2);
|
||||
if (cb->offset_vector[i] == PCRE2_UNSET)
|
||||
|
|
|
@ -5831,4 +5831,55 @@ ef) x/x,mark
|
|||
|
||||
/X?(R||){3335}/
|
||||
|
||||
/(?1)(A(*COMMIT)|B)D/
|
||||
ABD
|
||||
XABD
|
||||
BAD
|
||||
ABXABD
|
||||
\= Expect no match
|
||||
ABX
|
||||
|
||||
/(?(DEFINE)(?<m> 1? (?=(?<cond>2)?) 1 2 (?('cond')|3)))
|
||||
\A
|
||||
()
|
||||
(?&m)
|
||||
\Z/x
|
||||
123
|
||||
|
||||
/^(?:
|
||||
(?: A| (1? (?=(?<cond>2)?) (1) 2 (?('cond')|3)) )
|
||||
(Z)
|
||||
)+$/x
|
||||
AZ123Z
|
||||
\= Expect no match
|
||||
AZ12Z
|
||||
|
||||
/^ (?(DEFINE) ( (?!(a)\2b)..) ) ()(?1) /x
|
||||
acb
|
||||
\= Expect no match
|
||||
aab
|
||||
|
||||
'(?>ab|abab){1,5}?M'
|
||||
abababababababababababM
|
||||
|
||||
'(?>ab|abab){2}?M'
|
||||
abababM
|
||||
|
||||
'((?(?=(a))a)+k)'
|
||||
bbak
|
||||
|
||||
'((?(?=(a))a|)+k)'
|
||||
bbak
|
||||
|
||||
'(?(?!(b))a|b)+k'
|
||||
ababbalbbadabak
|
||||
|
||||
/(?!(b))c|b/
|
||||
Ab
|
||||
Ac
|
||||
|
||||
/(?=(b))b|c/
|
||||
Ab
|
||||
Ac
|
||||
|
||||
# End of testinput1
|
||||
|
|
|
@ -2880,19 +2880,6 @@
|
|||
xxxxabcde\=ps
|
||||
xxxxabcde\=ph
|
||||
|
||||
# This is not in the Perl-compatible test because Perl seems currently to be
|
||||
# broken and not behaving as specified in that it *does* bumpalong after
|
||||
# hitting (*COMMIT).
|
||||
|
||||
/(?1)(A(*COMMIT)|B)D/
|
||||
ABD
|
||||
XABD
|
||||
BAD
|
||||
ABXABD
|
||||
\= Expect no match
|
||||
ABX
|
||||
BAXBAD
|
||||
|
||||
/(\3)(\1)(a)/alt_bsux,allow_empty_class,match_unset_backref,dupnames
|
||||
cat
|
||||
|
||||
|
@ -3661,7 +3648,7 @@
|
|||
|
||||
/(?:(a)+(?C1)bb|aa(?C2)b)++/
|
||||
aab\=callout_capture
|
||||
aab\=callout_capture,ovector=1
|
||||
aab\=callout_capture,ovector=1,no_jit
|
||||
|
||||
/(ab)x|ab/
|
||||
ab\=ovector=0
|
||||
|
@ -3723,8 +3710,7 @@
|
|||
bnn
|
||||
|
||||
/(?(?=b(*SKIP)a)bn|bnn)/
|
||||
\= Expect no match
|
||||
bnn
|
||||
bnn\=no_jit
|
||||
|
||||
/(?=b(*THEN)a|)bn|bnn/
|
||||
bnn
|
||||
|
@ -4972,4 +4958,55 @@ a)"xI
|
|||
//
|
||||
\=ovector=7777777777
|
||||
|
||||
/(?1)(A(*COMMIT)|B)D/
|
||||
BAXBAD\=no_jit
|
||||
|
||||
"(?1){2}(a)"B
|
||||
|
||||
"(?1){2,4}(a)"B
|
||||
|
||||
# This test differs from Perl for the first subject. Perl ends up with
|
||||
# $1 set to 'B'; PCRE2 has it unset (which I think is right).
|
||||
|
||||
/^(?:
|
||||
(?:A| (?:B|B(*ACCEPT)) (?<=(.)) D)
|
||||
(Z)
|
||||
)+$/x
|
||||
AZB
|
||||
AZBDZ
|
||||
|
||||
# These four are temporarily here instead of test 1 while waiting for a JIT
|
||||
# update. They require backtracking into recursions.
|
||||
|
||||
/^(.|(.)(?1)\2)$/
|
||||
a\=no_jit
|
||||
aba\=no_jit
|
||||
abcba\=no_jit
|
||||
ababa\=no_jit
|
||||
abcdcba\=no_jit
|
||||
|
||||
/^((.)(?1)\2|.?)$/
|
||||
a\=no_jit
|
||||
aba\=no_jit
|
||||
abba\=no_jit
|
||||
abcba\=no_jit
|
||||
ababa\=no_jit
|
||||
abccba\=no_jit
|
||||
abcdcba\=no_jit
|
||||
abcddcba\=no_jit
|
||||
|
||||
/^(.)(\1|a(?2))/
|
||||
bab\=no_jit
|
||||
|
||||
/^(.|(.)(?1)?\2)$/
|
||||
abcba\=no_jit
|
||||
|
||||
# The first of these, when run by Perl, give the mark 'aa', which is wrong.
|
||||
|
||||
'(?>a(*:aa))b|ac' mark
|
||||
ac
|
||||
|
||||
'(?:a(*:aa))b|ac' mark
|
||||
ac
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -9311,4 +9311,94 @@ No match
|
|||
|
||||
/X?(R||){3335}/
|
||||
|
||||
/(?1)(A(*COMMIT)|B)D/
|
||||
ABD
|
||||
0: ABD
|
||||
1: B
|
||||
XABD
|
||||
0: ABD
|
||||
1: B
|
||||
BAD
|
||||
0: BAD
|
||||
1: A
|
||||
ABXABD
|
||||
0: ABD
|
||||
1: B
|
||||
\= Expect no match
|
||||
ABX
|
||||
No match
|
||||
|
||||
/(?(DEFINE)(?<m> 1? (?=(?<cond>2)?) 1 2 (?('cond')|3)))
|
||||
\A
|
||||
()
|
||||
(?&m)
|
||||
\Z/x
|
||||
123
|
||||
0: 123
|
||||
1: <unset>
|
||||
2: <unset>
|
||||
3:
|
||||
|
||||
/^(?:
|
||||
(?: A| (1? (?=(?<cond>2)?) (1) 2 (?('cond')|3)) )
|
||||
(Z)
|
||||
)+$/x
|
||||
AZ123Z
|
||||
0: AZ123Z
|
||||
1: 123
|
||||
2: <unset>
|
||||
3: 1
|
||||
4: Z
|
||||
\= Expect no match
|
||||
AZ12Z
|
||||
No match
|
||||
|
||||
/^ (?(DEFINE) ( (?!(a)\2b)..) ) ()(?1) /x
|
||||
acb
|
||||
0: ac
|
||||
1: <unset>
|
||||
2: <unset>
|
||||
3:
|
||||
\= Expect no match
|
||||
aab
|
||||
No match
|
||||
|
||||
'(?>ab|abab){1,5}?M'
|
||||
abababababababababababM
|
||||
0: abababababM
|
||||
|
||||
'(?>ab|abab){2}?M'
|
||||
abababM
|
||||
0: ababM
|
||||
|
||||
'((?(?=(a))a)+k)'
|
||||
bbak
|
||||
0: ak
|
||||
1: ak
|
||||
2: a
|
||||
|
||||
'((?(?=(a))a|)+k)'
|
||||
bbak
|
||||
0: ak
|
||||
1: ak
|
||||
2: a
|
||||
|
||||
'(?(?!(b))a|b)+k'
|
||||
ababbalbbadabak
|
||||
0: abak
|
||||
1: b
|
||||
|
||||
/(?!(b))c|b/
|
||||
Ab
|
||||
0: b
|
||||
Ac
|
||||
0: c
|
||||
|
||||
/(?=(b))b|c/
|
||||
Ab
|
||||
0: b
|
||||
1: b
|
||||
Ac
|
||||
0: c
|
||||
|
||||
# End of testinput1
|
||||
|
|
|
@ -12,13 +12,13 @@ Starting code units: a z
|
|||
Last code unit = 'z'
|
||||
Subject length lower bound = 2
|
||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
|
||||
Minimum match limit = 8
|
||||
Minimum recursion limit = 6
|
||||
Minimum match limit = 7
|
||||
Minimum recursion limit = 7
|
||||
0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz
|
||||
1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||
aaaaaaaaaaaaaz\=find_limits
|
||||
Minimum match limit = 32768
|
||||
Minimum recursion limit = 29
|
||||
Minimum match limit = 20481
|
||||
Minimum recursion limit = 30
|
||||
No match
|
||||
|
||||
!((?:\s|//.*\\n|/[*](?:\\n|.)*?[*]/)*)!I
|
||||
|
@ -26,61 +26,61 @@ Capturing subpattern count = 1
|
|||
May match empty string
|
||||
Subject length lower bound = 0
|
||||
/* this is a C style comment */\=find_limits
|
||||
Minimum match limit = 120
|
||||
Minimum recursion limit = 6
|
||||
Minimum match limit = 64
|
||||
Minimum recursion limit = 7
|
||||
0: /* this is a C style comment */
|
||||
1: /* this is a C style comment */
|
||||
|
||||
/^(?>a)++/
|
||||
aa\=find_limits
|
||||
Minimum match limit = 5
|
||||
Minimum recursion limit = 2
|
||||
Minimum recursion limit = 3
|
||||
0: aa
|
||||
aaaaaaaaa\=find_limits
|
||||
Minimum match limit = 12
|
||||
Minimum recursion limit = 2
|
||||
Minimum recursion limit = 3
|
||||
0: aaaaaaaaa
|
||||
|
||||
/(a)(?1)++/
|
||||
aa\=find_limits
|
||||
Minimum match limit = 7
|
||||
Minimum recursion limit = 4
|
||||
Minimum recursion limit = 5
|
||||
0: aa
|
||||
1: a
|
||||
aaaaaaaaa\=find_limits
|
||||
Minimum match limit = 21
|
||||
Minimum recursion limit = 4
|
||||
Minimum recursion limit = 5
|
||||
0: aaaaaaaaa
|
||||
1: a
|
||||
|
||||
/a(?:.)*?a/ims
|
||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||
Minimum match limit = 65
|
||||
Minimum recursion limit = 2
|
||||
Minimum match limit = 24
|
||||
Minimum recursion limit = 3
|
||||
0: abbbbbbbbbbbbbbbbbbbbba
|
||||
|
||||
/a(?:.(*THEN))*?a/ims
|
||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||
Minimum match limit = 86
|
||||
Minimum match limit = 66
|
||||
Minimum recursion limit = 45
|
||||
0: abbbbbbbbbbbbbbbbbbbbba
|
||||
|
||||
/a(?:.(*THEN:ABC))*?a/ims
|
||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||
Minimum match limit = 86
|
||||
Minimum match limit = 66
|
||||
Minimum recursion limit = 45
|
||||
0: abbbbbbbbbbbbbbbbbbbbba
|
||||
|
||||
/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
|
||||
aabbccddee\=find_limits
|
||||
Minimum match limit = 7
|
||||
Minimum recursion limit = 2
|
||||
Minimum recursion limit = 7
|
||||
0: aabbccddee
|
||||
|
||||
/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
|
||||
aabbccddee\=find_limits
|
||||
Minimum match limit = 17
|
||||
Minimum recursion limit = 16
|
||||
Minimum match limit = 12
|
||||
Minimum recursion limit = 12
|
||||
0: aabbccddee
|
||||
1: aa
|
||||
2: bb
|
||||
|
@ -90,7 +90,7 @@ Minimum recursion limit = 16
|
|||
|
||||
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
|
||||
aabbccddee\=find_limits
|
||||
Minimum match limit = 13
|
||||
Minimum match limit = 10
|
||||
Minimum recursion limit = 10
|
||||
0: aabbccddee
|
||||
1: aa
|
||||
|
|
|
@ -3517,12 +3517,10 @@ Subject length lower bound = 6
|
|||
1: abc
|
||||
123abcdef\=callout_capture
|
||||
Callout 0: last capture = 1
|
||||
0: <unset>
|
||||
1: abc
|
||||
--->123abcdef
|
||||
^ ^ d
|
||||
Callout 1: last capture = 1
|
||||
0: <unset>
|
||||
1: abc
|
||||
--->123abcdef
|
||||
^ ^ f
|
||||
|
@ -3572,18 +3570,15 @@ May match empty string
|
|||
Subject length lower bound = 0
|
||||
123\=callout_capture
|
||||
Callout 0: last capture = 0
|
||||
0: <unset>
|
||||
--->123
|
||||
^ ^ )*
|
||||
0: 123
|
||||
1: 123
|
||||
123456\=callout_capture
|
||||
Callout 0: last capture = 0
|
||||
0: <unset>
|
||||
--->123456
|
||||
^ ^ )*
|
||||
Callout 0: last capture = 1
|
||||
0: <unset>
|
||||
1: 123
|
||||
--->123456
|
||||
^ ^ )*
|
||||
|
@ -3591,16 +3586,13 @@ Callout 0: last capture = 1
|
|||
1: 456
|
||||
123456789\=callout_capture
|
||||
Callout 0: last capture = 0
|
||||
0: <unset>
|
||||
--->123456789
|
||||
^ ^ )*
|
||||
Callout 0: last capture = 1
|
||||
0: <unset>
|
||||
1: 123
|
||||
--->123456789
|
||||
^ ^ )*
|
||||
Callout 0: last capture = 1
|
||||
0: <unset>
|
||||
1: 456
|
||||
--->123456789
|
||||
^ ^ )*
|
||||
|
@ -3613,13 +3605,11 @@ First code unit = 'x'
|
|||
Subject length lower bound = 4
|
||||
xyzabc\=callout_capture
|
||||
Callout 0: last capture = 2
|
||||
0: <unset>
|
||||
1: <unset>
|
||||
2: xyz
|
||||
--->xyzabc
|
||||
^ ^ p
|
||||
Callout 1: last capture = 0
|
||||
0: <unset>
|
||||
--->xyzabc
|
||||
^ x
|
||||
0: xyzabc
|
||||
|
@ -3632,14 +3622,12 @@ Last code unit = 'x'
|
|||
Subject length lower bound = 5
|
||||
Xxyzabc\=callout_capture
|
||||
Callout 0: last capture = 3
|
||||
0: <unset>
|
||||
1: X
|
||||
2: <unset>
|
||||
3: xyz
|
||||
--->Xxyzabc
|
||||
^ ^ p
|
||||
Callout 1: last capture = 1
|
||||
0: <unset>
|
||||
1: X
|
||||
--->Xxyzabc
|
||||
^^ x
|
||||
|
@ -3654,7 +3642,6 @@ Last code unit = 'f'
|
|||
Subject length lower bound = 6
|
||||
abcdef\=callout_capture
|
||||
Callout 0: last capture = 1
|
||||
0: <unset>
|
||||
1: abc
|
||||
--->abcdef
|
||||
^ a
|
||||
|
@ -3668,12 +3655,10 @@ Last code unit = 'z'
|
|||
Subject length lower bound = 6
|
||||
abcxyz\=callout_capture
|
||||
Callout 1: last capture = 1
|
||||
0: <unset>
|
||||
1: abc
|
||||
--->abcxyz
|
||||
^ ^ d
|
||||
Callout 2: last capture = 0
|
||||
0: <unset>
|
||||
--->abcxyz
|
||||
^ a
|
||||
0: abcxyz
|
||||
|
@ -3686,7 +3671,6 @@ Last code unit = 'z'
|
|||
Subject length lower bound = 3
|
||||
abcxyz\=callout_capture
|
||||
Callout 0: last capture = 1
|
||||
0: <unset>
|
||||
1: abc
|
||||
--->abcxyz
|
||||
^ )
|
||||
|
@ -3834,7 +3818,7 @@ Subject length lower bound = 2
|
|||
Bra
|
||||
CBra 1
|
||||
a
|
||||
Once
|
||||
SBra
|
||||
Recurse
|
||||
KetRmax
|
||||
b
|
||||
|
@ -3938,7 +3922,7 @@ Subject length lower bound = 9
|
|||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Subject length lower bound = 3
|
||||
Subject length lower bound = 2
|
||||
a=a
|
||||
0: a=a
|
||||
1: a
|
||||
|
@ -9776,29 +9760,6 @@ Partial match: abca
|
|||
xxxxabcde\=ph
|
||||
Partial match: abcde
|
||||
|
||||
# This is not in the Perl-compatible test because Perl seems currently to be
|
||||
# broken and not behaving as specified in that it *does* bumpalong after
|
||||
# hitting (*COMMIT).
|
||||
|
||||
/(?1)(A(*COMMIT)|B)D/
|
||||
ABD
|
||||
0: ABD
|
||||
1: B
|
||||
XABD
|
||||
0: ABD
|
||||
1: B
|
||||
BAD
|
||||
0: BAD
|
||||
1: A
|
||||
ABXABD
|
||||
0: ABD
|
||||
1: B
|
||||
\= Expect no match
|
||||
ABX
|
||||
No match
|
||||
BAXBAD
|
||||
No match
|
||||
|
||||
/(\3)(\1)(a)/alt_bsux,allow_empty_class,match_unset_backref,dupnames
|
||||
cat
|
||||
0: a
|
||||
|
@ -9947,7 +9908,7 @@ No match
|
|||
Bra
|
||||
^
|
||||
Brazero
|
||||
Once
|
||||
SBra
|
||||
Recurse
|
||||
KetRmax
|
||||
Cond
|
||||
|
@ -11669,17 +11630,14 @@ Subject length lower bound = 3
|
|||
/(?:(a)+(?C1)bb|aa(?C2)b)/
|
||||
aab\=callout_capture
|
||||
Callout 1: last capture = 1
|
||||
0: <unset>
|
||||
1: a
|
||||
--->aab
|
||||
^ ^ b
|
||||
Callout 1: last capture = 1
|
||||
0: <unset>
|
||||
1: a
|
||||
--->aab
|
||||
^^ b
|
||||
Callout 2: last capture = 0
|
||||
0: <unset>
|
||||
--->aab
|
||||
^ ^ b
|
||||
0: aab
|
||||
|
@ -11687,12 +11645,10 @@ Callout 2: last capture = 0
|
|||
/(?:(a)++(?C1)bb|aa(?C2)b)/
|
||||
aab\=callout_capture
|
||||
Callout 1: last capture = 1
|
||||
0: <unset>
|
||||
1: a
|
||||
--->aab
|
||||
^ ^ b
|
||||
Callout 2: last capture = 0
|
||||
0: <unset>
|
||||
--->aab
|
||||
^ ^ b
|
||||
0: aab
|
||||
|
@ -11700,12 +11656,10 @@ Callout 2: last capture = 0
|
|||
/(?:(?>(a))(?C1)bb|aa(?C2)b)/
|
||||
aab\=callout_capture
|
||||
Callout 1: last capture = 1
|
||||
0: <unset>
|
||||
1: a
|
||||
--->aab
|
||||
^^ b
|
||||
Callout 2: last capture = 0
|
||||
0: <unset>
|
||||
--->aab
|
||||
^ ^ b
|
||||
0: aab
|
||||
|
@ -11713,15 +11667,12 @@ Callout 2: last capture = 0
|
|||
/(?:(?1)(?C1)x|ab(?C2))((a)){0}/
|
||||
aab\=callout_capture
|
||||
Callout 1: last capture = 0
|
||||
0: <unset>
|
||||
--->aab
|
||||
^^ x
|
||||
Callout 1: last capture = 0
|
||||
0: <unset>
|
||||
--->aab
|
||||
^^ x
|
||||
Callout 2: last capture = 0
|
||||
0: <unset>
|
||||
--->aab
|
||||
^ ^ )
|
||||
0: ab
|
||||
|
@ -11729,13 +11680,11 @@ Callout 2: last capture = 0
|
|||
/(?1)(?C1)((a)(?C2)){0}/
|
||||
aab\=callout_capture
|
||||
Callout 2: last capture = 2
|
||||
0: <unset>
|
||||
1: <unset>
|
||||
2: a
|
||||
--->aab
|
||||
^^ ){0}
|
||||
Callout 1: last capture = 0
|
||||
0: <unset>
|
||||
--->aab
|
||||
^^ (
|
||||
0: a
|
||||
|
@ -11743,31 +11692,27 @@ Callout 1: last capture = 0
|
|||
/(?:(a)+(?C1)bb|aa(?C2)b)++/
|
||||
aab\=callout_capture
|
||||
Callout 1: last capture = 1
|
||||
0: <unset>
|
||||
1: a
|
||||
--->aab
|
||||
^ ^ b
|
||||
Callout 1: last capture = 1
|
||||
0: <unset>
|
||||
1: a
|
||||
--->aab
|
||||
^^ b
|
||||
Callout 2: last capture = 0
|
||||
0: <unset>
|
||||
--->aab
|
||||
^ ^ b
|
||||
0: aab
|
||||
aab\=callout_capture,ovector=1
|
||||
aab\=callout_capture,ovector=1,no_jit
|
||||
Callout 1: last capture = 1
|
||||
0: <unset>
|
||||
1: a
|
||||
--->aab
|
||||
^ ^ b
|
||||
Callout 1: last capture = 1
|
||||
0: <unset>
|
||||
1: a
|
||||
--->aab
|
||||
^^ b
|
||||
Callout 2: last capture = 0
|
||||
0: <unset>
|
||||
--->aab
|
||||
^ ^ b
|
||||
0: aab
|
||||
|
@ -11875,9 +11820,8 @@ No match
|
|||
0: bn
|
||||
|
||||
/(?(?=b(*SKIP)a)bn|bnn)/
|
||||
\= Expect no match
|
||||
bnn
|
||||
No match
|
||||
bnn\=no_jit
|
||||
0: bnn
|
||||
|
||||
/(?=b(*THEN)a|)bn|bnn/
|
||||
bnn
|
||||
|
@ -13895,7 +13839,6 @@ Callout (10): "AB"
|
|||
/^a(b)c(?C1)def/
|
||||
abcdef\=callout_capture
|
||||
Callout 1: last capture = 1
|
||||
0: <unset>
|
||||
1: b
|
||||
--->abcdef
|
||||
^ ^ d
|
||||
|
@ -13918,7 +13861,6 @@ Callout 1: last capture = 1
|
|||
------------------------------------------------------------------
|
||||
abcdef\=callout_capture
|
||||
Callout (10): {AB} last capture = 1
|
||||
0: <unset>
|
||||
1: b
|
||||
--->abcdef
|
||||
^ ^ d
|
||||
|
@ -15483,6 +15425,140 @@ Subject length lower bound = 11
|
|||
\=ovector=7777777777
|
||||
** Invalid value in 'ovector=7777777777'
|
||||
|
||||
/(?1)(A(*COMMIT)|B)D/
|
||||
BAXBAD\=no_jit
|
||||
0: BAD
|
||||
1: A
|
||||
|
||||
"(?1){2}(a)"B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
Recurse
|
||||
Recurse
|
||||
CBra 1
|
||||
a
|
||||
Ket
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
"(?1){2,4}(a)"B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
Recurse
|
||||
Recurse
|
||||
Brazero
|
||||
Bra
|
||||
Bra
|
||||
Recurse
|
||||
Ket
|
||||
Brazero
|
||||
Bra
|
||||
Recurse
|
||||
Ket
|
||||
Ket
|
||||
CBra 1
|
||||
a
|
||||
Ket
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# This test differs from Perl for the first subject. Perl ends up with
|
||||
# $1 set to 'B'; PCRE2 has it unset (which I think is right).
|
||||
|
||||
/^(?:
|
||||
(?:A| (?:B|B(*ACCEPT)) (?<=(.)) D)
|
||||
(Z)
|
||||
)+$/x
|
||||
AZB
|
||||
0: AZB
|
||||
1: <unset>
|
||||
2: Z
|
||||
AZBDZ
|
||||
0: AZBDZ
|
||||
1: B
|
||||
2: Z
|
||||
|
||||
# These four are temporarily here instead of test 1 while waiting for a JIT
|
||||
# update. They require backtracking into recursions.
|
||||
|
||||
/^(.|(.)(?1)\2)$/
|
||||
a\=no_jit
|
||||
0: a
|
||||
1: a
|
||||
aba\=no_jit
|
||||
0: aba
|
||||
1: aba
|
||||
2: a
|
||||
abcba\=no_jit
|
||||
0: abcba
|
||||
1: abcba
|
||||
2: a
|
||||
ababa\=no_jit
|
||||
0: ababa
|
||||
1: ababa
|
||||
2: a
|
||||
abcdcba\=no_jit
|
||||
0: abcdcba
|
||||
1: abcdcba
|
||||
2: a
|
||||
|
||||
/^((.)(?1)\2|.?)$/
|
||||
a\=no_jit
|
||||
0: a
|
||||
1: a
|
||||
aba\=no_jit
|
||||
0: aba
|
||||
1: aba
|
||||
2: a
|
||||
abba\=no_jit
|
||||
0: abba
|
||||
1: abba
|
||||
2: a
|
||||
abcba\=no_jit
|
||||
0: abcba
|
||||
1: abcba
|
||||
2: a
|
||||
ababa\=no_jit
|
||||
0: ababa
|
||||
1: ababa
|
||||
2: a
|
||||
abccba\=no_jit
|
||||
0: abccba
|
||||
1: abccba
|
||||
2: a
|
||||
abcdcba\=no_jit
|
||||
0: abcdcba
|
||||
1: abcdcba
|
||||
2: a
|
||||
abcddcba\=no_jit
|
||||
0: abcddcba
|
||||
1: abcddcba
|
||||
2: a
|
||||
|
||||
/^(.)(\1|a(?2))/
|
||||
bab\=no_jit
|
||||
0: bab
|
||||
1: b
|
||||
2: ab
|
||||
|
||||
/^(.|(.)(?1)?\2)$/
|
||||
abcba\=no_jit
|
||||
0: abcba
|
||||
1: abcba
|
||||
2: a
|
||||
|
||||
# The first of these, when run by Perl, give the mark 'aa', which is wrong.
|
||||
|
||||
'(?>a(*:aa))b|ac' mark
|
||||
ac
|
||||
0: ac
|
||||
|
||||
'(?:a(*:aa))b|ac' mark
|
||||
ac
|
||||
0: ac
|
||||
|
||||
# End of testinput2
|
||||
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
|
|
|
@ -7517,7 +7517,6 @@ Callout (10): "AB"
|
|||
/^a(b)c(?C1)def/
|
||||
abcdef\=callout_capture
|
||||
Callout 1: last capture = 0
|
||||
0:
|
||||
--->abcdef
|
||||
^ ^ d
|
||||
0: abcdef
|
||||
|
@ -7538,7 +7537,6 @@ Callout 1: last capture = 0
|
|||
------------------------------------------------------------------
|
||||
abcdef\=callout_capture
|
||||
Callout (10): {AB} last capture = 0
|
||||
0:
|
||||
--->abcdef
|
||||
^ ^ d
|
||||
0: abcdef
|
||||
|
|
|
@ -187,7 +187,7 @@ Memory allocation (code space): 40
|
|||
0 17 Bra
|
||||
2 13 CBra 1
|
||||
5 a
|
||||
7 4 Once
|
||||
7 4 SBra
|
||||
9 2 Recurse
|
||||
11 4 KetRmax
|
||||
13 b
|
||||
|
@ -759,18 +759,14 @@ Memory allocation (code space): 14
|
|||
|
||||
"(?1)(?#?'){2}(a)"
|
||||
------------------------------------------------------------------
|
||||
0 21 Bra
|
||||
2 4 Once
|
||||
4 14 Recurse
|
||||
6 4 Ket
|
||||
8 4 Once
|
||||
10 14 Recurse
|
||||
12 4 Ket
|
||||
14 5 CBra 1
|
||||
17 a
|
||||
19 5 Ket
|
||||
21 21 Ket
|
||||
23 End
|
||||
0 13 Bra
|
||||
2 6 Recurse
|
||||
4 6 Recurse
|
||||
6 5 CBra 1
|
||||
9 a
|
||||
11 5 Ket
|
||||
13 13 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?2)(?R)|\1|$)()/
|
||||
|
|
|
@ -187,7 +187,7 @@ Memory allocation (code space): 54
|
|||
0 23 Bra
|
||||
3 17 CBra 1
|
||||
7 a
|
||||
9 6 Once
|
||||
9 6 SBra
|
||||
12 3 Recurse
|
||||
15 6 KetRmax
|
||||
18 b
|
||||
|
@ -759,18 +759,14 @@ Memory allocation (code space): 18
|
|||
|
||||
"(?1)(?#?'){2}(a)"
|
||||
------------------------------------------------------------------
|
||||
0 30 Bra
|
||||
3 6 Once
|
||||
6 21 Recurse
|
||||
9 6 Ket
|
||||
12 6 Once
|
||||
15 21 Recurse
|
||||
18 6 Ket
|
||||
21 6 CBra 1
|
||||
25 a
|
||||
27 6 Ket
|
||||
30 30 Ket
|
||||
33 End
|
||||
0 18 Bra
|
||||
3 9 Recurse
|
||||
6 9 Recurse
|
||||
9 6 CBra 1
|
||||
13 a
|
||||
15 6 Ket
|
||||
18 18 Ket
|
||||
21 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?2)(?R)|\1|$)()/
|
||||
|
|
|
@ -187,7 +187,7 @@ Memory allocation (code space): 54
|
|||
0 23 Bra
|
||||
3 17 CBra 1
|
||||
7 a
|
||||
9 6 Once
|
||||
9 6 SBra
|
||||
12 3 Recurse
|
||||
15 6 KetRmax
|
||||
18 b
|
||||
|
@ -759,18 +759,14 @@ Memory allocation (code space): 18
|
|||
|
||||
"(?1)(?#?'){2}(a)"
|
||||
------------------------------------------------------------------
|
||||
0 30 Bra
|
||||
3 6 Once
|
||||
6 21 Recurse
|
||||
9 6 Ket
|
||||
12 6 Once
|
||||
15 21 Recurse
|
||||
18 6 Ket
|
||||
21 6 CBra 1
|
||||
25 a
|
||||
27 6 Ket
|
||||
30 30 Ket
|
||||
33 End
|
||||
0 18 Bra
|
||||
3 9 Recurse
|
||||
6 9 Recurse
|
||||
9 6 CBra 1
|
||||
13 a
|
||||
15 6 Ket
|
||||
18 18 Ket
|
||||
21 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?2)(?R)|\1|$)()/
|
||||
|
|
|
@ -187,7 +187,7 @@ Memory allocation (code space): 80
|
|||
0 17 Bra
|
||||
2 13 CBra 1
|
||||
5 a
|
||||
7 4 Once
|
||||
7 4 SBra
|
||||
9 2 Recurse
|
||||
11 4 KetRmax
|
||||
13 b
|
||||
|
@ -759,18 +759,14 @@ Memory allocation (code space): 28
|
|||
|
||||
"(?1)(?#?'){2}(a)"
|
||||
------------------------------------------------------------------
|
||||
0 21 Bra
|
||||
2 4 Once
|
||||
4 14 Recurse
|
||||
6 4 Ket
|
||||
8 4 Once
|
||||
10 14 Recurse
|
||||
12 4 Ket
|
||||
14 5 CBra 1
|
||||
17 a
|
||||
19 5 Ket
|
||||
21 21 Ket
|
||||
23 End
|
||||
0 13 Bra
|
||||
2 6 Recurse
|
||||
4 6 Recurse
|
||||
6 5 CBra 1
|
||||
9 a
|
||||
11 5 Ket
|
||||
13 13 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?2)(?R)|\1|$)()/
|
||||
|
|
|
@ -187,7 +187,7 @@ Memory allocation (code space): 80
|
|||
0 17 Bra
|
||||
2 13 CBra 1
|
||||
5 a
|
||||
7 4 Once
|
||||
7 4 SBra
|
||||
9 2 Recurse
|
||||
11 4 KetRmax
|
||||
13 b
|
||||
|
@ -759,18 +759,14 @@ Memory allocation (code space): 28
|
|||
|
||||
"(?1)(?#?'){2}(a)"
|
||||
------------------------------------------------------------------
|
||||
0 21 Bra
|
||||
2 4 Once
|
||||
4 14 Recurse
|
||||
6 4 Ket
|
||||
8 4 Once
|
||||
10 14 Recurse
|
||||
12 4 Ket
|
||||
14 5 CBra 1
|
||||
17 a
|
||||
19 5 Ket
|
||||
21 21 Ket
|
||||
23 End
|
||||
0 13 Bra
|
||||
2 6 Recurse
|
||||
4 6 Recurse
|
||||
6 5 CBra 1
|
||||
9 a
|
||||
11 5 Ket
|
||||
13 13 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?2)(?R)|\1|$)()/
|
||||
|
|
|
@ -187,7 +187,7 @@ Memory allocation (code space): 80
|
|||
0 17 Bra
|
||||
2 13 CBra 1
|
||||
5 a
|
||||
7 4 Once
|
||||
7 4 SBra
|
||||
9 2 Recurse
|
||||
11 4 KetRmax
|
||||
13 b
|
||||
|
@ -759,18 +759,14 @@ Memory allocation (code space): 28
|
|||
|
||||
"(?1)(?#?'){2}(a)"
|
||||
------------------------------------------------------------------
|
||||
0 21 Bra
|
||||
2 4 Once
|
||||
4 14 Recurse
|
||||
6 4 Ket
|
||||
8 4 Once
|
||||
10 14 Recurse
|
||||
12 4 Ket
|
||||
14 5 CBra 1
|
||||
17 a
|
||||
19 5 Ket
|
||||
21 21 Ket
|
||||
23 End
|
||||
0 13 Bra
|
||||
2 6 Recurse
|
||||
4 6 Recurse
|
||||
6 5 CBra 1
|
||||
9 a
|
||||
11 5 Ket
|
||||
13 13 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?2)(?R)|\1|$)()/
|
||||
|
|
|
@ -187,7 +187,7 @@ Memory allocation (code space): 28
|
|||
0 24 Bra
|
||||
3 18 CBra 1
|
||||
8 a
|
||||
10 6 Once
|
||||
10 6 SBra
|
||||
13 3 Recurse
|
||||
16 6 KetRmax
|
||||
19 b
|
||||
|
@ -759,18 +759,14 @@ Memory allocation (code space): 10
|
|||
|
||||
"(?1)(?#?'){2}(a)"
|
||||
------------------------------------------------------------------
|
||||
0 31 Bra
|
||||
3 6 Once
|
||||
6 21 Recurse
|
||||
9 6 Ket
|
||||
12 6 Once
|
||||
15 21 Recurse
|
||||
18 6 Ket
|
||||
21 7 CBra 1
|
||||
26 a
|
||||
28 7 Ket
|
||||
31 31 Ket
|
||||
34 End
|
||||
0 19 Bra
|
||||
3 9 Recurse
|
||||
6 9 Recurse
|
||||
9 7 CBra 1
|
||||
14 a
|
||||
16 7 Ket
|
||||
19 19 Ket
|
||||
22 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?2)(?R)|\1|$)()/
|
||||
|
|
|
@ -187,7 +187,7 @@ Memory allocation (code space): 35
|
|||
0 30 Bra
|
||||
4 22 CBra 1
|
||||
10 a
|
||||
12 8 Once
|
||||
12 8 SBra
|
||||
16 4 Recurse
|
||||
20 8 KetRmax
|
||||
24 b
|
||||
|
@ -759,18 +759,14 @@ Memory allocation (code space): 12
|
|||
|
||||
"(?1)(?#?'){2}(a)"
|
||||
------------------------------------------------------------------
|
||||
0 40 Bra
|
||||
4 8 Once
|
||||
8 28 Recurse
|
||||
12 8 Ket
|
||||
16 8 Once
|
||||
20 28 Recurse
|
||||
24 8 Ket
|
||||
28 8 CBra 1
|
||||
34 a
|
||||
36 8 Ket
|
||||
40 40 Ket
|
||||
44 End
|
||||
0 24 Bra
|
||||
4 12 Recurse
|
||||
8 12 Recurse
|
||||
12 8 CBra 1
|
||||
18 a
|
||||
20 8 Ket
|
||||
24 24 Ket
|
||||
28 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?2)(?R)|\1|$)()/
|
||||
|
|
|
@ -187,7 +187,7 @@ Memory allocation (code space): 42
|
|||
0 36 Bra
|
||||
5 26 CBra 1
|
||||
12 a
|
||||
14 10 Once
|
||||
14 10 SBra
|
||||
19 5 Recurse
|
||||
24 10 KetRmax
|
||||
29 b
|
||||
|
@ -759,18 +759,14 @@ Memory allocation (code space): 14
|
|||
|
||||
"(?1)(?#?'){2}(a)"
|
||||
------------------------------------------------------------------
|
||||
0 49 Bra
|
||||
5 10 Once
|
||||
10 35 Recurse
|
||||
15 10 Ket
|
||||
20 10 Once
|
||||
25 35 Recurse
|
||||
30 10 Ket
|
||||
35 9 CBra 1
|
||||
42 a
|
||||
44 9 Ket
|
||||
49 49 Ket
|
||||
54 End
|
||||
0 29 Bra
|
||||
5 15 Recurse
|
||||
10 15 Recurse
|
||||
15 9 CBra 1
|
||||
22 a
|
||||
24 9 Ket
|
||||
29 29 Ket
|
||||
34 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/.((?2)(?R)|\1|$)()/
|
||||
|
|
Loading…
Reference in New Issue