Code for pcre2_substitute(), and tests.

This commit is contained in:
Philip.Hazel 2014-11-11 10:19:23 +00:00
parent 1bbc86fe67
commit f1c6ee730a
16 changed files with 520 additions and 171 deletions

View File

@ -1,4 +1,4 @@
.TH PCRE2TEST 1 "02 November 2014" "PCRE 10.00" .TH PCRE2TEST 1 "09 November 2014" "PCRE 10.00"
.SH NAME .SH NAME
pcre2test - a program for testing Perl-compatible regular expressions. pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS .SH SYNOPSIS
@ -447,7 +447,6 @@ about the pattern:
posix use the POSIX API posix use the POSIX API
stackguard=<number> test the stackguard feature stackguard=<number> test the stackguard feature
tables=[0|1|2] select internal tables tables=[0|1|2] select internal tables
use_length use the pattern's length
.sp .sp
The effects of these modifiers are described in the following sections. The effects of these modifiers are described in the following sections.
FIXME: Give more examples. FIXME: Give more examples.
@ -497,15 +496,10 @@ pairs. For example:
/ab 32 59/hex /ab 32 59/hex
.sp .sp
This feature is provided as a way of creating patterns that contain binary zero This feature is provided as a way of creating patterns that contain binary zero
characters. When \fBhex\fP is set, it implies \fBuse_length\fP. characters. By default, \fBpcre2test\fP passes patterns as zero-terminated
. strings to \fBpcre2_compile()\fP, giving the length as PCRE2_ZERO_TERMINATED.
. However, for patterns specified in hexadecimal, the length of the pattern is
.SS "Using the pattern's length" passed.
.rs
.sp
By default, \fBpcre2test\fP passes patterns as zero-terminated strings to
\fBpcre2_compile()\fP, giving the length as -1. If \fBuse_length\fP is set, the
length of the pattern is passed. This is implied if \fBhex\fP is set.
. .
. .
.SS "JIT compilation" .SS "JIT compilation"
@ -726,6 +720,7 @@ pattern.
ovector=<n> set size of output vector ovector=<n> set size of output vector
recursion_limit=<n> set a recursion limit recursion_limit=<n> set a recursion limit
startchar show startchar when relevant startchar show startchar when relevant
zero_terminate pass the subject as zero-terminated
.sp .sp
The effects of these modifiers are described in the following sections. The effects of these modifiers are described in the following sections.
FIXME: Give more examples. FIXME: Give more examples.
@ -931,6 +926,19 @@ create a match block with a zero-length ovector; there is always one pair of
offsets.) offsets.)
. .
. .
.SS "Passing the subject as zero-terminated"
.rs
.sp
By default, the subject string is passed to a native API matching function with
its correct length. In order to test the facility for passing a zero-terminated
string, the \fBzero_terminate\fP modifier is provided. It causes the length to
be passed as PCRE2_ZERO_TERMINATED. (When matching via the POSIX interface,
this modifier has no effect, as there is no facility for passing a length.)
.P
When testing \fBpcre2_substitute\fP, this modifier also has the effect of
passing the replacement string as zero-terminated.
.
.
.SH "THE ALTERNATIVE MATCHING FUNCTION" .SH "THE ALTERNATIVE MATCHING FUNCTION"
.rs .rs
.sp .sp
@ -1192,6 +1200,6 @@ Cambridge CB2 3QH, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 02 November 2014 Last updated: 09 November 2014
Copyright (c) 1997-2014 University of Cambridge. Copyright (c) 1997-2014 University of Cambridge.
.fi .fi

View File

@ -206,24 +206,25 @@ context functions. */
#define PCRE2_ERROR_BADMODE (-32) #define PCRE2_ERROR_BADMODE (-32)
#define PCRE2_ERROR_BADOFFSET (-33) #define PCRE2_ERROR_BADOFFSET (-33)
#define PCRE2_ERROR_BADOPTION (-34) #define PCRE2_ERROR_BADOPTION (-34)
#define PCRE2_ERROR_BADUTFOFFSET (-35) #define PCRE2_ERROR_BADREPLACEMENT (-35)
#define PCRE2_ERROR_CALLOUT (-36) /* Never used by PCRE2 itself */ #define PCRE2_ERROR_BADUTFOFFSET (-36)
#define PCRE2_ERROR_DFA_BADRESTART (-37) #define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
#define PCRE2_ERROR_DFA_RECURSE (-38) #define PCRE2_ERROR_DFA_BADRESTART (-38)
#define PCRE2_ERROR_DFA_UCOND (-39) #define PCRE2_ERROR_DFA_RECURSE (-39)
#define PCRE2_ERROR_DFA_UITEM (-40) #define PCRE2_ERROR_DFA_UCOND (-40)
#define PCRE2_ERROR_DFA_WSSIZE (-41) #define PCRE2_ERROR_DFA_UITEM (-41)
#define PCRE2_ERROR_INTERNAL (-42) #define PCRE2_ERROR_DFA_WSSIZE (-42)
#define PCRE2_ERROR_JIT_BADOPTION (-43) #define PCRE2_ERROR_INTERNAL (-43)
#define PCRE2_ERROR_JIT_STACKLIMIT (-44) #define PCRE2_ERROR_JIT_BADOPTION (-44)
#define PCRE2_ERROR_MATCHLIMIT (-45) #define PCRE2_ERROR_JIT_STACKLIMIT (-45)
#define PCRE2_ERROR_NOMEMORY (-46) #define PCRE2_ERROR_MATCHLIMIT (-46)
#define PCRE2_ERROR_NOSUBSTRING (-47) #define PCRE2_ERROR_NOMEMORY (-47)
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-48) #define PCRE2_ERROR_NOSUBSTRING (-48)
#define PCRE2_ERROR_NULL (-49) #define PCRE2_ERROR_NOUNIQUESUBSTRING (-49)
#define PCRE2_ERROR_RECURSELOOP (-50) #define PCRE2_ERROR_NULL (-50)
#define PCRE2_ERROR_RECURSIONLIMIT (-51) #define PCRE2_ERROR_RECURSELOOP (-51)
#define PCRE2_ERROR_UNSET (-52) #define PCRE2_ERROR_RECURSIONLIMIT (-52)
#define PCRE2_ERROR_UNSET (-53)
/* Request types for pcre2_pattern_info() */ /* Request types for pcre2_pattern_info() */

View File

@ -206,24 +206,25 @@ static const char match_error_texts[] =
"bad offset value\0" "bad offset value\0"
"bad option value\0" "bad option value\0"
/* 35 */ /* 35 */
"invalid replacement string\0"
"bad offset into UTF string\0" "bad offset into UTF string\0"
"callout error code\0" /* Never returned by PCRE2 itself */ "callout error code\0" /* Never returned by PCRE2 itself */
"invalid data in workspace for DFA restart\0" "invalid data in workspace for DFA restart\0"
"too much recursion for DFA matching\0" "too much recursion for DFA matching\0"
"backreference condition or recursion test not supported for DFA matching\0"
/* 40 */ /* 40 */
"backreference condition or recursion test not supported for DFA matching\0"
"item unsupported for DFA matching\0" "item unsupported for DFA matching\0"
"workspace size exceeded in DFA matching\0" "workspace size exceeded in DFA matching\0"
"internal error - pattern overwritten?\0" "internal error - pattern overwritten?\0"
"bad JIT option\0" "bad JIT option\0"
"JIT stack limit reached\0"
/* 45 */ /* 45 */
"JIT stack limit reached\0"
"match limit exceeded\0" "match limit exceeded\0"
"no more memory\0" "no more memory\0"
"unknown or unset substring\0" "unknown or unset substring\0"
"non-unique substring name\0" "non-unique substring name\0"
"NULL argument passed\0"
/* 50 */ /* 50 */
"NULL argument passed\0"
"nested recursion at the same subject position\0" "nested recursion at the same subject position\0"
"recursion limit exceeded\0" "recursion limit exceeded\0"
"requested value is not set\0" "requested value is not set\0"

View File

@ -51,7 +51,7 @@ POSSIBILITY OF SUCH DAMAGE.
*************************************************/ *************************************************/
/* This function applies a compiled re to a subject string and creates a new /* This function applies a compiled re to a subject string and creates a new
string with substitutione. The first 7 arguments are the same as for string with substitutions. The first 7 arguments are the same as for
pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED. pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
Arguments: Arguments:
@ -69,6 +69,7 @@ Arguments:
Returns: > 0 number of substitutions made Returns: > 0 number of substitutions made
< 0 an error code, including PCRE2_ERROR_NOMATCH if no match < 0 an error code, including PCRE2_ERROR_NOMATCH if no match
PCRE2_ERROR_BADREPLACEMENT means invalid use of $
*/ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@ -86,6 +87,11 @@ BOOL global = FALSE;
PCRE2_SIZE buff_offset, lengthleft, endlength; PCRE2_SIZE buff_offset, lengthleft, endlength;
PCRE2_SIZE *ovector; PCRE2_SIZE *ovector;
/* Partial matching is not valid. */
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
return PCRE2_ERROR_BADOPTION;
/* If no match data block is provided, create one. */ /* If no match data block is provided, create one. */
if (match_data == NULL) if (match_data == NULL)
@ -129,11 +135,16 @@ do
rc = pcre2_match(code, subject, length, start_offset, options|goptions, rc = pcre2_match(code, subject, length, start_offset, options|goptions,
match_data, mcontext); match_data, mcontext);
/* Any error other than no match returns the error code. No match when not
doing the special after-empty-match global rematch, or when at the end of the
subject, breaks the global loop. Otherwise, advance the starting point and
try again. */
if (rc < 0) if (rc < 0)
{ {
if (goptions == 0 || rc != PCRE2_ERROR_NOMATCH || start_offset >= length) if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
break; if (goptions == 0 || start_offset >= length) break;
start_offset++; start_offset++;
if ((code->overall_options & PCRE2_UTF) != 0) if ((code->overall_options & PCRE2_UTF) != 0)
{ {
@ -149,6 +160,8 @@ do
goptions = 0; goptions = 0;
continue; continue;
} }
/* Handle a successful match. */
subs++; subs++;
if (rc == 0) rc = ovector_count; if (rc == 0) rc = ovector_count;
@ -161,29 +174,34 @@ do
for (i = 0; i < rlength; i++) for (i = 0; i < rlength; i++)
{ {
if (replacement[i] == CHAR_DOLLAR_SIGN && i != rlength - 1) if (replacement[i] == CHAR_DOLLAR_SIGN)
{ {
int group = -1; int group, n;
int n = 0; BOOL inparens;
BOOL inparens = FALSE; PCRE2_SIZE sublength;
PCRE2_SIZE j = i + 1; PCRE2_UCHAR next;
PCRE2_SIZE sublength; PCRE2_UCHAR name[33];
PCRE2_UCHAR next = replacement[j];
PCRE2_UCHAR name[33]; if (++i == rlength) goto BAD;
if ((next = replacement[i]) == CHAR_DOLLAR_SIGN) goto LITERAL;
group = -1;
n = 0;
inparens = FALSE;
if (next == CHAR_LEFT_CURLY_BRACKET) if (next == CHAR_LEFT_CURLY_BRACKET)
{ {
if (j == rlength - 1) goto LITERAL; if (++i == rlength) goto BAD;
next = replacement[i];
inparens = TRUE; inparens = TRUE;
next = replacement[++j];
} }
if (next >= CHAR_0 && next <= CHAR_9) if (next >= CHAR_0 && next <= CHAR_9)
{ {
group = next - CHAR_0; group = next - CHAR_0;
while (j < rlength - 1) while (i < rlength - 1)
{ {
next = replacement[++j]; next = replacement[++i];
if (next < CHAR_0 || next > CHAR_9) break; if (next < CHAR_0 || next > CHAR_9) break;
group = group * 10 + next - CHAR_0; group = group * 10 + next - CHAR_0;
} }
@ -194,31 +212,31 @@ do
while (MAX_255(next) && (ctypes[next] & ctype_word) != 0) while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
{ {
name[n++] = next; name[n++] = next;
if (n > 32) goto LITERAL; if (n > 32) goto BAD;
if (j == rlength - 1) break; if (i == rlength) break;
next = replacement[++j]; next = replacement[++i];
} }
if (n == 0) goto BAD;
name[n] = 0; name[n] = 0;
} }
if (inparens) if (inparens)
{ {
if (j == rlength || next != CHAR_RIGHT_CURLY_BRACKET) goto LITERAL; if (i == rlength || next != CHAR_RIGHT_CURLY_BRACKET) goto BAD;
} }
else j--; /* Last code unit of name/number */ else i--; /* Last code unit of name/number */
/* Have found a syntactically correct group number or name. */ /* Have found a syntactically correct group number or name. */
i = j; /* Where to continue from */ sublength = lengthleft;
if (group < 0) if (group < 0)
rc = pcre2_substring_copy_byname(match_data, name, rc = pcre2_substring_copy_byname(match_data, name,
buffer + buff_offset, &sublength); buffer + buff_offset, &sublength);
else else
rc = pcre2_substring_copy_bynumber(match_data, group, rc = pcre2_substring_copy_bynumber(match_data, group,
buffer + buff_offset, &sublength); buffer + buff_offset, &sublength);
if (rc < 0) goto EXIT; if (rc < 0) goto EXIT;
buff_offset += sublength; buff_offset += sublength;
lengthleft -= sublength; lengthleft -= sublength;
} }
@ -242,20 +260,16 @@ do
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART; PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
} while (global); /* Repeat "do" loop */ } while (global); /* Repeat "do" loop */
/* No match is a "normal" end; copy the rest of the subject and return the /* Copy the rest of the subject and return the number of substitutions. */
number of substitutions. */
if (rc == PCRE2_ERROR_NOMATCH) rc = subs;
{ endlength = length - start_offset;
rc = subs; if (endlength + 1 > lengthleft) goto NOROOM;
endlength = length - start_offset; memcpy(buffer + buff_offset, subject + start_offset,
if (endlength + 1 >= lengthleft) goto NOROOM; endlength*(PCRE2_CODE_UNIT_WIDTH/8));
memcpy(buffer + buff_offset, subject + start_offset, buff_offset += endlength;
endlength*(PCRE2_CODE_UNIT_WIDTH/8)); buffer[buff_offset] = 0;
buff_offset += endlength; *blength = buff_offset;
buffer[buff_offset] = 0;
*blength = buff_offset;
}
EXIT: EXIT:
if (match_data_created) pcre2_match_data_free(match_data); if (match_data_created) pcre2_match_data_free(match_data);
@ -264,6 +278,10 @@ return rc;
NOROOM: NOROOM:
rc = PCRE2_ERROR_NOMEMORY; rc = PCRE2_ERROR_NOMEMORY;
goto EXIT; goto EXIT;
BAD:
rc = PCRE2_ERROR_BADREPLACEMENT;
goto EXIT;
} }
/* End of pcre2_substitute.c */ /* End of pcre2_substitute.c */

View File

@ -165,6 +165,7 @@ void vms_setsymbol( char *, char *, int );
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */ #define DEFAULT_OVECCOUNT 15 /* Default ovector count */
#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */ #define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
#define LOOPREPEAT 500000 /* Default loop count for timing */ #define LOOPREPEAT 500000 /* Default loop count for timing */
#define REPLACE_BUFFSIZE 400 /* For replacement strings */
#define VERSION_SIZE 64 /* Size of buffer for the version strings */ #define VERSION_SIZE 64 /* Size of buffer for the version strings */
/* Execution modes */ /* Execution modes */
@ -345,9 +346,9 @@ either on a pattern or a data line, so they must all be distinct. */
#define CTL_JITVERIFY 0x00010000u #define CTL_JITVERIFY 0x00010000u
#define CTL_MARK 0x00020000u #define CTL_MARK 0x00020000u
#define CTL_MEMORY 0x00040000u #define CTL_MEMORY 0x00040000u
#define CTL_PATLEN 0x00080000u #define CTL_POSIX 0x00080000u
#define CTL_POSIX 0x00100000u #define CTL_STARTCHAR 0x00100000u
#define CTL_STARTCHAR 0x00200000u #define CTL_ZERO_TERMINATE 0x00200000u
#define CTL_BSR_SET 0x80000000u /* This is informational */ #define CTL_BSR_SET 0x80000000u /* This is informational */
#define CTL_NL_SET 0x40000000u /* This is informational */ #define CTL_NL_SET 0x40000000u /* This is informational */
@ -376,6 +377,7 @@ typedef struct patctl { /* Structure for pattern modifiers. */
uint32_t stackguard_test; uint32_t stackguard_test;
uint32_t tables_id; uint32_t tables_id;
uint8_t locale[32]; uint8_t locale[32];
uint8_t replacement[REPLACE_BUFFSIZE];
} patctl; } patctl;
#define MAXCPYGET 10 #define MAXCPYGET 10
@ -485,13 +487,14 @@ static modstruct modlist[] = {
{ "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) }, { "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
{ "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) }, { "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
{ "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) }, { "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) },
{ "replace", MOD_PAT, MOD_STR, 0, PO(replacement) },
{ "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) }, { "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
{ "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) }, { "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
{ "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) }, { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
{ "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) }, { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
{ "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) }, { "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
{ "use_length", MOD_PAT, MOD_CTL, CTL_PATLEN, PO(control) }, { "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) },
{ "utf", MOD_PATP, MOD_OPT, PCRE2_UTF, PO(options) } { "zero_terminate", MOD_DAT, MOD_CTL, CTL_ZERO_TERMINATE, DO(control) }
}; };
#define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct) #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
@ -945,6 +948,17 @@ are supported. */
else \ else \
pcre2_set_recursion_limit_32(G(a,32),b) pcre2_set_recursion_limit_32(G(a,32),b)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
if (test_mode == PCRE8_MODE) \
a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
(PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
else if (test_mode == PCRE16_MODE) \
a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
(PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
else \
a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
(PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
if (test_mode == PCRE8_MODE) \ if (test_mode == PCRE8_MODE) \
a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \ a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
@ -1298,6 +1312,16 @@ the three different cases. */
else \ else \
G(pcre2_set_recursion_limit_,BITTWO)(G(a,BITTWO),b) G(pcre2_set_recursion_limit_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
G(g,BITONE),G(h,BITONE),(G(PCRE2_SPTR,BITONE))i,j, \
(G(PCRE2_UCHAR,BITONE) *)k,l); \
else \
a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
G(g,BITTWO),G(h,BITTWO),(G(PCRE2_SPTR,BITTWO))i,j, \
(G(PCRE2_UCHAR,BITTWO) *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\ a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
@ -1466,6 +1490,9 @@ the three different cases. */
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b) #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
(PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e) a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
@ -1544,6 +1571,9 @@ the three different cases. */
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b) #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
(PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e) a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
@ -1622,6 +1652,9 @@ the three different cases. */
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b) #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b) #define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
(PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
#define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \ #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e) a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
#define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \ #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
@ -3199,9 +3232,9 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
((controls & CTL_JITVERIFY) != 0)? " jitverify" : "", ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
((controls & CTL_MARK) != 0)? " mark" : "", ((controls & CTL_MARK) != 0)? " mark" : "",
((controls & CTL_MEMORY) != 0)? " memory" : "", ((controls & CTL_MEMORY) != 0)? " memory" : "",
((controls & CTL_PATLEN) != 0)? " use_length" : "",
((controls & CTL_POSIX) != 0)? " posix" : "", ((controls & CTL_POSIX) != 0)? " posix" : "",
((controls & CTL_STARTCHAR) != 0)? " startchar" : ""); ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
} }
@ -3672,6 +3705,7 @@ patlen = p - buffer - 2;
/* Look for modifiers and options after the final delimiter. */ /* Look for modifiers and options after the final delimiter. */
if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP; if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
utf = (pat_patctl.options & PCRE2_UTF) != 0;
/* Assume full JIT compile for jitverify and/or jitfast if nothing else was /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
specified. */ specified. */
@ -3679,7 +3713,6 @@ specified. */
if (pat_patctl.jit == 0 && if (pat_patctl.jit == 0 &&
(pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0) (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
pat_patctl.jit = 7; pat_patctl.jit = 7;
utf = (pat_patctl.options & PCRE2_UTF) != 0;
/* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
in callouts. Convert to binary if required. */ in callouts. Convert to binary if required. */
@ -3786,6 +3819,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
/* Check for features that the POSIX interface does not support. */ /* Check for features that the POSIX interface does not support. */
if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale"); if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
if (pat_patctl.tables_id != 0) prmsg(&msg, "tables"); if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard"); if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
if (timeit > 0) prmsg(&msg, "timing"); if (timeit > 0) prmsg(&msg, "timing");
@ -3863,11 +3897,11 @@ switch(errorcode)
break; break;
} }
/* The pattern in now in pbuffer[8|16|32], with the length in patlen. By /* The pattern is now in pbuffer[8|16|32], with the length in patlen. By
default, however, we pass a zero-terminated pattern. The length is passed only default, however, we pass a zero-terminated pattern. The length is passed only
if we had a hex pattern or if use_length was set. */ if we had a hex pattern. */
if ((pat_patctl.control & (CTL_PATLEN|CTL_HEXPAT)) == 0) patlen = -1; if ((pat_patctl.control & CTL_HEXPAT) == 0) patlen = PCRE2_ZERO_TERMINATED;
/* Compile many times when timing. */ /* Compile many times when timing. */
@ -4491,22 +4525,6 @@ SET(*q, 0);
len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */ len = CASTVAR(uint8_t *, q) - dbuffer; /* Length in bytes */
ulen = len/code_unit_size; /* Length in code units */ ulen = len/code_unit_size; /* Length in code units */
/* If we have explicit valgrind support, mark the data from after its end to
the end of the buffer as unaddressable, so that a read over the end of the
buffer will be seen by valgrind, even if it doesn't cause a crash. If we're not
building with valgrind support, at least move the data to the end of the buffer
so that it might at least cause a crash. If we are using the POSIX interface,
we must include the terminating zero. */
pp = dbuffer;
c = code_unit_size * ((pat_patctl.control & CTL_POSIX) != 0)? 1:0;
#ifdef SUPPORT_VALGRIND
VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + c, dbuffer_size - (len + c));
#else
pp = memmove(pp + dbuffer_size - len - c, pp, len + c);
#endif
/* If the string was terminated by \= we must now interpret modifiers. */ /* If the string was terminated by \= we must now interpret modifiers. */
if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl)) if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
@ -4522,10 +4540,27 @@ if (c - (c & -c) != 0)
return PR_OK; return PR_OK;
} }
/* Now run the pattern match: len contains the byte length, ulen contains the /* If we have explicit valgrind support, mark the data from after its end to
code unit length, and pp points to the subject string. POSIX matching is only the end of the buffer as unaddressable, so that a read over the end of the
possible in 8-bit mode, and it does not support timing or other fancy features. buffer will be seen by valgrind, even if it doesn't cause a crash. If we're not
Some were checked at compile time, but we need to check the match-time settings building with valgrind support, at least move the data to the end of the buffer
so that it might at least cause a crash. If we are using the POSIX interface,
or testing zero-termination, we must include the terminating zero. */
pp = dbuffer;
c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
(dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
#ifdef SUPPORT_VALGRIND
VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + c, dbuffer_size - (len + c));
#else
pp = memmove(pp + dbuffer_size - len - c, pp, len + c);
#endif
/* We now have len containing the byte length, ulen containing the code unit
length, and pp pointing to the subject string. POSIX matching is only possible
in 8-bit mode, and it does not support timing or other fancy features. Some
were checked at compile time, but we need to check the match-time settings
here. */ here. */
#ifdef SUPPORT_PCRE2_8 #ifdef SUPPORT_PCRE2_8
@ -4621,6 +4656,11 @@ if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
dat_datctl.control &= ~CTL_ALLUSEDTEXT; dat_datctl.control &= ~CTL_ALLUSEDTEXT;
} }
/* Handle passing the subject as zero-terminated. */
if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
ulen = PCRE2_ZERO_TERMINATED;
/* Enable display of malloc/free if wanted. */ /* Enable display of malloc/free if wanted. */
show_memory = (dat_datctl.control & CTL_MEMORY) != 0; show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
@ -4676,9 +4716,134 @@ else
PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL); PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
} }
/* Loop for global matching */ /* If a replacement string is provided, call pcre2_substitute() instead of one
of the matching functions. First we have to convert the replacement string to
the appropriate width. */
for (gmatched = 0;; gmatched++) if (pat_patctl.replacement[0] != 0)
{
int rc;
uint8_t *pr;
uint8_t rbuffer[REPLACE_BUFFSIZE];
uint8_t nbuffer[REPLACE_BUFFSIZE];
uint32_t goption;
PCRE2_SIZE rlen;
PCRE2_SIZE nsize;
#ifdef SUPPORT_PCRE2_8
uint8_t *r8 = NULL;
#endif
#ifdef SUPPORT_PCRE2_16
uint16_t *r16 = NULL;
#endif
#ifdef SUPPORT_PCRE2_32
uint32_t *r32 = NULL;
#endif
goption = ((pat_patctl.control & CTL_GLOBAL) == 0)? 0 :
PCRE2_SUBSTITUTE_GLOBAL;
SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
pr = pat_patctl.replacement;
/* If the replacement starts with '[<number>]' we interpret that as length
value for the replacement buffer. */
nsize = REPLACE_BUFFSIZE/code_unit_size;
if (*pr == '[')
{
PCRE2_SIZE n = 0;
while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
if (*pr++ != ']')
{
fprintf(outfile, "Bad buffer size in replacement string\n");
return PR_OK;
}
if (n > nsize)
{
fprintf(outfile, "Replacement buffer setting (%ld) is too large "
"(max %ld)\n", n, nsize);
return PR_OK;
}
nsize = n;
}
/* Now copy the replacement string to a buffer of the appropriate width. */
while ((c = *pr++) != 0)
{
if (utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
/* At present no escape processing is provided for replacements. */
#ifdef SUPPORT_PCRE2_8
if (test_mode == PCRE8_MODE)
{
if (utf)
{
r8 += ord2utf8(c, r8);
}
else
{
*r8++ = c;
}
}
#endif
#ifdef SUPPORT_PCRE2_16
if (test_mode == PCRE16_MODE)
{
if (utf)
{
if (c >= 0x10000u)
{
c-= 0x10000u;
*r16++ = 0xD800 | (c >> 10);
*r16++ = 0xDC00 | (c & 0x3ff);
}
else
*r16++ = c;
}
else
{
*r16++ = c;
}
}
#endif
#ifdef SUPPORT_PCRE2_32
if (test_mode == PCRE32_MODE)
{
*r32++ = c;
}
#endif
}
SET(*r, 0);
if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
rlen = PCRE2_ZERO_TERMINATED;
else
rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
PCRE2_SUBSTITUTE(rc, compiled_code, pp, ulen, dat_datctl.offset,
dat_datctl.options|goption, match_data, dat_context,
rbuffer, rlen, nbuffer, &nsize);
if (rc < 0)
{
fprintf(outfile, "Failed: error %d: ", rc);
PCRE2_GET_ERROR_MESSAGE(nsize, rc, pbuffer);
PCHARSV(CASTVAR(void *, pbuffer), 0, nsize, FALSE, outfile);
}
else
{
fprintf(outfile, "%2d: ", rc);
PCHARSV(nbuffer, 0, nsize, utf, outfile);
}
fprintf(outfile, "\n");
} /* End of substitution handling */
/* When a replacement string is not provided, run a loop for global matching
with one of the basic matching functions. */
else for (gmatched = 0;; gmatched++)
{ {
PCRE2_SIZE j; PCRE2_SIZE j;
int capcount; int capcount;
@ -4689,7 +4854,7 @@ for (gmatched = 0;; gmatched++)
/* Fill the ovector with junk to detect elements that do not get set /* Fill the ovector with junk to detect elements that do not get set
when they should be. */ when they should be. */
for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET; for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
/* When matching is via pcre2_match(), we will detect the use of JIT via the /* When matching is via pcre2_match(), we will detect the use of JIT via the
@ -4787,7 +4952,7 @@ for (gmatched = 0;; gmatched++)
{ {
PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */ PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
} }
/* Run a single DFA or NFA match. */ /* Run a single DFA or NFA match. */
if ((dat_datctl.control & CTL_DFA) != 0) if ((dat_datctl.control & CTL_DFA) != 0)
@ -4888,7 +5053,7 @@ for (gmatched = 0;; gmatched++)
fprintf(outfile, "Start of matched string is beyond its end - " fprintf(outfile, "Start of matched string is beyond its end - "
"displaying from end to start.\n"); "displaying from end to start.\n");
} }
fprintf(outfile, "%2d: ", i/2); fprintf(outfile, "%2d: ", i/2);
/* Check for an unset group */ /* Check for an unset group */
@ -4900,15 +5065,15 @@ for (gmatched = 0;; gmatched++)
} }
/* Check for silly offsets, in particular, values that have not been /* Check for silly offsets, in particular, values that have not been
set when they should have been. */ set when they should have been. */
if (start > ulen || end > ulen) if (start > ulen || end > ulen)
{ {
fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n", fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
start, end); start, end);
continue; continue;
} }
/* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
JIT, it is disabled above, with a comment.) When the match is done by the JIT, it is disabled above, with a comment.) When the match is done by the
interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is

14
testdata/grepoutput vendored
View File

@ -384,15 +384,15 @@ aaaaa2
010203040506 010203040506
RC=0 RC=0
======== STDERR ======== ======== STDERR ========
pcre2grep: pcre2_match() gave error -45 while matching this text: pcre2grep: pcre2_match() gave error -46 while matching this text:
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
pcre2grep: pcre2_match() gave error -45 while matching this text: pcre2grep: pcre2_match() gave error -46 while matching this text:
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
pcre2grep: Error -44, -45 or -51 means that a resource limit was exceeded. pcre2grep: Error -45, -46 or -52 means that a resource limit was exceeded.
pcre2grep: Check your regex for nested unlimited loops. pcre2grep: Check your regex for nested unlimited loops.
---------------------------- Test 38 ------------------------------ ---------------------------- Test 38 ------------------------------
This line contains a binary zero here >< for testing. This line contains a binary zero here >< for testing.
@ -510,23 +510,23 @@ In the middle of a line, PATTERN appears.
Check up on PATTERN near the end. Check up on PATTERN near the end.
RC=0 RC=0
---------------------------- Test 62 ----------------------------- ---------------------------- Test 62 -----------------------------
pcre2grep: pcre2_match() gave error -45 while matching text that starts: pcre2grep: pcre2_match() gave error -46 while matching text that starts:
This is a file of miscellaneous text that is used as test data for checking This is a file of miscellaneous text that is used as test data for checking
that the pcregrep command is working correctly. The file must be more than 24K that the pcregrep command is working correctly. The file must be more than 24K
long so that it needs more than a single read long so that it needs more than a single read
pcre2grep: Error -44, -45 or -51 means that a resource limit was exceeded. pcre2grep: Error -45, -46 or -52 means that a resource limit was exceeded.
pcre2grep: Check your regex for nested unlimited loops. pcre2grep: Check your regex for nested unlimited loops.
RC=1 RC=1
---------------------------- Test 63 ----------------------------- ---------------------------- Test 63 -----------------------------
pcre2grep: pcre2_match() gave error -51 while matching text that starts: pcre2grep: pcre2_match() gave error -52 while matching text that starts:
This is a file of miscellaneous text that is used as test data for checking This is a file of miscellaneous text that is used as test data for checking
that the pcregrep command is working correctly. The file must be more than 24K that the pcregrep command is working correctly. The file must be more than 24K
long so that it needs more than a single read long so that it needs more than a single read
pcre2grep: Error -44, -45 or -51 means that a resource limit was exceeded. pcre2grep: Error -45, -46 or -52 means that a resource limit was exceeded.
pcre2grep: Check your regex for nested unlimited loops. pcre2grep: Check your regex for nested unlimited loops.
RC=1 RC=1
---------------------------- Test 64 ------------------------------ ---------------------------- Test 64 ------------------------------

61
testdata/testinput2 vendored
View File

@ -4008,4 +4008,65 @@ a random value. /Ix
/(((((a)))))/parens_nest_limit=2 /(((((a)))))/parens_nest_limit=2
# Tests for pcre2_substitute()
/abc/replace=XYZ
123123
123abc123
123abc123abc123
123123\=zero_terminate
123abc123\=zero_terminate
123abc123abc123\=zero_terminate
/abc/g,replace=XYZ
123abc123
123abc123abc123
/abc/replace=X$$Z
123abc123
/abc/g,replace=X$$Z
123abc123abc123
/a(b)c(d)e/replace=X$1Y${2}Z
"abcde"
/a(b)c(d)e/replace=X$1Y${2}Z,global
"abcde-abcde"
/a(?<ONE>b)c(?<TWO>d)e/replace=X$ONE+${TWO}Z
"abcde"
/a(?<ONE>b)c(?<TWO>d)e/g,replace=X$ONE+${TWO}Z
"abcde-abcde-"
/abc/replace=a$++
123abc
/abc/replace=a$bad
123abc
/abc/replace=a${A234567890123456789_123456789012}z
123abc
/abc/replace=a${A23456789012345678901234567890123}z
123abc
/abc/replace=a${bcd
123abc
/abc/replace=a${b+d}z
123abc
/abc/replace=[10]XYZ
123abc123
/abc/replace=[9]XYZ
123abc123
/abc/replace=xyz
1abc2\=partial_hard
# End of substitute tests
# End of testinput2 # End of testinput2

5
testdata/testinput5 vendored
View File

@ -1627,5 +1627,10 @@
/\x{100}\x{200}\K\x{300}/utf,startchar /\x{100}\x{200}\K\x{300}/utf,startchar
\x{100}\x{200}\x{300} \x{100}\x{200}\x{300}
# Test UTF characters in a substitution
/ábc/utf,replace=XሴZ
123ábc123
# End of testinput5 # End of testinput5

View File

@ -888,7 +888,7 @@ Subject length lower bound = 3
a\x{123}aa\=offset=1 a\x{123}aa\=offset=1
0: aa 0: aa
a\x{123}aa\=offset=2 a\x{123}aa\=offset=2
Error -35 (bad UTF-8 offset) Error -36 (bad UTF-8 offset)
a\x{123}aa\=offset=3 a\x{123}aa\=offset=3
0: aa 0: aa
a\x{123}aa\=offset=4 a\x{123}aa\=offset=4

View File

@ -851,9 +851,9 @@ Subject length lower bound = 1
/a/utf /a/utf
\x{10000}\=offset=1 \x{10000}\=offset=1
Error -35 (bad UTF-16 offset) Error -36 (bad UTF-16 offset)
\x{10000}ab\=offset=1 \x{10000}ab\=offset=1
Error -35 (bad UTF-16 offset) Error -36 (bad UTF-16 offset)
\x{10000}ab\=offset=2 \x{10000}ab\=offset=2
0: a 0: a
\x{10000}ab\=offset=3 \x{10000}ab\=offset=3

24
testdata/testoutput14 vendored
View File

@ -114,11 +114,11 @@ Subject length lower bound = 3
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
No match No match
aaaaaaaaaaaaaz\=match_limit=3000 aaaaaaaaaaaaaz\=match_limit=3000
Failed: error -45: match limit exceeded Failed: error -46: match limit exceeded
/(a+)*zz/ /(a+)*zz/
aaaaaaaaaaaaaz\=recursion_limit=10 aaaaaaaaaaaaaz\=recursion_limit=10
Failed: error -51: recursion limit exceeded Failed: error -52: recursion limit exceeded
/(*LIMIT_MATCH=3000)(a+)*zz/I /(*LIMIT_MATCH=3000)(a+)*zz/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -127,9 +127,9 @@ Starting code units: a z
Last code unit = 'z' Last code unit = 'z'
Subject length lower bound = 2 Subject length lower bound = 2
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
Failed: error -45: match limit exceeded Failed: error -46: match limit exceeded
aaaaaaaaaaaaaz\=match_limit=60000 aaaaaaaaaaaaaz\=match_limit=60000
Failed: error -45: match limit exceeded Failed: error -46: match limit exceeded
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I /(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -138,7 +138,7 @@ Starting code units: a z
Last code unit = 'z' Last code unit = 'z'
Subject length lower bound = 2 Subject length lower bound = 2
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
Failed: error -45: match limit exceeded Failed: error -46: match limit exceeded
/(*LIMIT_MATCH=60000)(a+)*zz/I /(*LIMIT_MATCH=60000)(a+)*zz/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -149,7 +149,7 @@ Subject length lower bound = 2
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
No match No match
aaaaaaaaaaaaaz\=match_limit=3000 aaaaaaaaaaaaaz\=match_limit=3000
Failed: error -45: match limit exceeded Failed: error -46: match limit exceeded
/(*LIMIT_RECURSION=10)(a+)*zz/I /(*LIMIT_RECURSION=10)(a+)*zz/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -158,9 +158,9 @@ Starting code units: a z
Last code unit = 'z' Last code unit = 'z'
Subject length lower bound = 2 Subject length lower bound = 2
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
Failed: error -51: recursion limit exceeded Failed: error -52: recursion limit exceeded
aaaaaaaaaaaaaz\=recursion_limit=1000 aaaaaaaaaaaaaz\=recursion_limit=1000
Failed: error -51: recursion limit exceeded Failed: error -52: recursion limit exceeded
/(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I /(*LIMIT_RECURSION=10)(*LIMIT_RECURSION=1000)(a+)*zz/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -180,21 +180,21 @@ Subject length lower bound = 2
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
No match No match
aaaaaaaaaaaaaz\=recursion_limit=10 aaaaaaaaaaaaaz\=recursion_limit=10
Failed: error -51: recursion limit exceeded Failed: error -52: recursion limit exceeded
# These three have infinitely nested recursions. # These three have infinitely nested recursions.
/((?2))((?1))/ /((?2))((?1))/
abc abc
Failed: error -50: nested recursion at the same subject position Failed: error -51: nested recursion at the same subject position
/((?(R2)a+|(?1)b))/ /((?(R2)a+|(?1)b))/
aaaabcde aaaabcde
Failed: error -50: nested recursion at the same subject position Failed: error -51: nested recursion at the same subject position
/(?(R)a*(?1)|((?R))b)/ /(?(R)a*(?1)|((?R))b)/
aaaabcde aaaabcde
Failed: error -50: nested recursion at the same subject position Failed: error -51: nested recursion at the same subject position
# The allusedtext modifier does not work with JIT, which does not maintain # The allusedtext modifier does not work with JIT, which does not maintain
# the leftchar/rightchar data. # the leftchar/rightchar data.

30
testdata/testoutput16 vendored
View File

@ -15,7 +15,7 @@ JIT compilation was not successful
/(?(R)a*(?1)|((?R))b)/ /(?(R)a*(?1)|((?R))b)/
aaaabcde aaaabcde
Failed: error -44: JIT stack limit reached Failed: error -45: JIT stack limit reached
/abcd/I /abcd/I
Capturing subpattern count = 0 Capturing subpattern count = 0
@ -64,13 +64,13 @@ No match
abcd abcd
0: abcd (JIT) 0: abcd (JIT)
ab\=ps ab\=ps
Failed: error -43: bad JIT option Failed: error -44: bad JIT option
ab\=ph ab\=ph
Failed: error -43: bad JIT option Failed: error -44: bad JIT option
xyz xyz
No match (JIT) No match (JIT)
xyz\=ps xyz\=ps
Failed: error -43: bad JIT option Failed: error -44: bad JIT option
/abcd/jit=2 /abcd/jit=2
abcd abcd
@ -84,13 +84,13 @@ No match
/abcd/jit=2,jitfast /abcd/jit=2,jitfast
abcd abcd
Failed: error -43: bad JIT option Failed: error -44: bad JIT option
ab\=ps ab\=ps
Partial match: ab (JIT) Partial match: ab (JIT)
ab\=ph ab\=ph
Failed: error -43: bad JIT option Failed: error -44: bad JIT option
xyz xyz
Failed: error -43: bad JIT option Failed: error -44: bad JIT option
/abcd/jit=3 /abcd/jit=3
abcd abcd
@ -256,7 +256,7 @@ Minimum match limit = 6
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
No match (JIT) No match (JIT)
aaaaaaaaaaaaaz\=match_limit=3000 aaaaaaaaaaaaaz\=match_limit=3000
Failed: error -45: match limit exceeded Failed: error -46: match limit exceeded
/(*LIMIT_MATCH=3000)(a+)*zz/I /(*LIMIT_MATCH=3000)(a+)*zz/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -266,9 +266,9 @@ Last code unit = 'z'
Subject length lower bound = 2 Subject length lower bound = 2
JIT compilation was successful JIT compilation was successful
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
Failed: error -45: match limit exceeded Failed: error -46: match limit exceeded
aaaaaaaaaaaaaz\=match_limit=60000 aaaaaaaaaaaaaz\=match_limit=60000
Failed: error -45: match limit exceeded Failed: error -46: match limit exceeded
/(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I /(*LIMIT_MATCH=60000)(*LIMIT_MATCH=3000)(a+)*zz/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -278,7 +278,7 @@ Last code unit = 'z'
Subject length lower bound = 2 Subject length lower bound = 2
JIT compilation was successful JIT compilation was successful
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
Failed: error -45: match limit exceeded Failed: error -46: match limit exceeded
/(*LIMIT_MATCH=60000)(a+)*zz/I /(*LIMIT_MATCH=60000)(a+)*zz/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -290,21 +290,21 @@ JIT compilation was successful
aaaaaaaaaaaaaz aaaaaaaaaaaaaz
No match (JIT) No match (JIT)
aaaaaaaaaaaaaz\=match_limit=3000 aaaaaaaaaaaaaz\=match_limit=3000
Failed: error -45: match limit exceeded Failed: error -46: match limit exceeded
# These three have infinitely nested recursions. # These three have infinitely nested recursions.
/((?2))((?1))/ /((?2))((?1))/
abc abc
Failed: error -44: JIT stack limit reached Failed: error -45: JIT stack limit reached
/((?(R2)a+|(?1)b))/ /((?(R2)a+|(?1)b))/
aaaabcde aaaabcde
Failed: error -44: JIT stack limit reached Failed: error -45: JIT stack limit reached
/(?(R)a*(?1)|((?R))b)/ /(?(R)a*(?1)|((?R))b)/
aaaabcde aaaabcde
Failed: error -44: JIT stack limit reached Failed: error -45: JIT stack limit reached
# Invalid options disable JIT when called via pcre2_match(), causing the # Invalid options disable JIT when called via pcre2_match(), causing the
# match to happen via the interpreter, but for fast JIT invalid options are # match to happen via the interpreter, but for fast JIT invalid options are

102
testdata/testoutput2 vendored
View File

@ -993,7 +993,7 @@ Subject length lower bound = 4
0: abcd 0: abcd
1: a 1: a
2: d 2: d
Copy substring 5 failed (-47): unknown or unset substring Copy substring 5 failed (-48): unknown or unset substring
/(.{20})/I /(.{20})/I
Capturing subpattern count = 1 Capturing subpattern count = 1
@ -1047,9 +1047,9 @@ Subject length lower bound = 4
2: <unset> 2: <unset>
3: f 3: f
1G a (1) 1G a (1)
Get substring 2 failed (-47): unknown or unset substring Get substring 2 failed (-48): unknown or unset substring
3G f (1) 3G f (1)
Get substring 4 failed (-47): unknown or unset substring Get substring 4 failed (-48): unknown or unset substring
0L adef 0L adef
1L a 1L a
2L 2L
@ -1062,7 +1062,7 @@ Get substring 4 failed (-47): unknown or unset substring
1G bc (2) 1G bc (2)
2G bc (2) 2G bc (2)
3G f (1) 3G f (1)
Get substring 4 failed (-47): unknown or unset substring Get substring 4 failed (-48): unknown or unset substring
0L bcdef 0L bcdef
1L bc 1L bc
2L bc 2L bc
@ -4363,7 +4363,7 @@ Subject length lower bound = 8
1: cd 1: cd
2: gh 2: gh
Number not found for group 'three' Number not found for group 'three'
Copy substring 'three' failed (-47): unknown or unset substring Copy substring 'three' failed (-48): unknown or unset substring
/(?P<Tes>)(?P<Test>)/IB /(?P<Tes>)(?P<Test>)/IB
------------------------------------------------------------------ ------------------------------------------------------------------
@ -5731,7 +5731,7 @@ No match
1: a1 1: a1
2: a1 2: a1
Number not found for group 'Z' Number not found for group 'Z'
Copy substring 'Z' failed (-47): unknown or unset substring Copy substring 'Z' failed (-48): unknown or unset substring
C a1 (2) A (non-unique) C a1 (2) A (non-unique)
/(?|(?<a>)(?<b>)(?<a>)|(?<a>)(?<b>)(?<a>))/I,dupnames /(?|(?<a>)(?<b>)(?<a>)|(?<a>)(?<b>)(?<a>))/I,dupnames
@ -5772,7 +5772,7 @@ Subject length lower bound = 2
C a (1) A (non-unique) C a (1) A (non-unique)
cd\=copy=A cd\=copy=A
0: cd 0: cd
Copy substring 'A' failed (-47): unknown or unset substring Copy substring 'A' failed (-48): unknown or unset substring
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames /^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
Capturing subpattern count = 4 Capturing subpattern count = 4
@ -5817,7 +5817,7 @@ No match
1: a1 1: a1
2: a1 2: a1
Number not found for group 'Z' Number not found for group 'Z'
Get substring 'Z' failed (-47): unknown or unset substring Get substring 'Z' failed (-48): unknown or unset substring
G a1 (2) A (non-unique) G a1 (2) A (non-unique)
/^(?P<A>a)(?P<A>b)/I,dupnames /^(?P<A>a)(?P<A>b)/I,dupnames
@ -5848,7 +5848,7 @@ Subject length lower bound = 2
G a (1) A (non-unique) G a (1) A (non-unique)
cd\=get=A cd\=get=A
0: cd 0: cd
Get substring 'A' failed (-47): unknown or unset substring Get substring 'A' failed (-48): unknown or unset substring
/^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames /^(?P<A>a)(?P<A>b)|cd(?P<A>ef)(?P<A>gh)/I,dupnames
Capturing subpattern count = 4 Capturing subpattern count = 4
@ -13607,4 +13607,88 @@ Subject length lower bound = 0
/(((((a)))))/parens_nest_limit=2 /(((((a)))))/parens_nest_limit=2
Failed: error 119 at offset 3: parentheses are too deeply nested Failed: error 119 at offset 3: parentheses are too deeply nested
# Tests for pcre2_substitute()
/abc/replace=XYZ
123123
0: 123123
123abc123
1: 123XYZ123
123abc123abc123
1: 123XYZ123abc123
123123\=zero_terminate
0: 123123
123abc123\=zero_terminate
1: 123XYZ123
123abc123abc123\=zero_terminate
1: 123XYZ123abc123
/abc/g,replace=XYZ
123abc123
1: 123XYZ123
123abc123abc123
2: 123XYZ123XYZ123
/abc/replace=X$$Z
123abc123
1: 123X$Z123
/abc/g,replace=X$$Z
123abc123abc123
2: 123X$Z123X$Z123
/a(b)c(d)e/replace=X$1Y${2}Z
"abcde"
1: "XbYdZ"
/a(b)c(d)e/replace=X$1Y${2}Z,global
"abcde-abcde"
2: "XbYdZ-XbYdZ"
/a(?<ONE>b)c(?<TWO>d)e/replace=X$ONE+${TWO}Z
"abcde"
1: "Xb+dZ"
/a(?<ONE>b)c(?<TWO>d)e/g,replace=X$ONE+${TWO}Z
"abcde-abcde-"
2: "Xb+dZ-Xb+dZ-"
/abc/replace=a$++
123abc
Failed: error -35: invalid replacement string
/abc/replace=a$bad
123abc
Failed: error -48: unknown or unset substring
/abc/replace=a${A234567890123456789_123456789012}z
123abc
Failed: error -48: unknown or unset substring
/abc/replace=a${A23456789012345678901234567890123}z
123abc
Failed: error -35: invalid replacement string
/abc/replace=a${bcd
123abc
Failed: error -35: invalid replacement string
/abc/replace=a${b+d}z
123abc
Failed: error -35: invalid replacement string
/abc/replace=[10]XYZ
123abc123
1: 123XYZ123
/abc/replace=[9]XYZ
123abc123
Failed: error -47: no more memory
/abc/replace=xyz
1abc2\=partial_hard
Failed: error -34: bad option value
# End of substitute tests
# End of testinput2 # End of testinput2

View File

@ -3995,5 +3995,11 @@ Subject length lower bound = 1
\x{100}\x{200}\x{300} \x{100}\x{200}\x{300}
0: \x{100}\x{200}\x{300} 0: \x{100}\x{200}\x{300}
^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^
# Test UTF characters in a substitution
/ábc/utf,replace=XሴZ
123ábc123
1: 123X\x{1234}Z123
# End of testinput5 # End of testinput5

24
testdata/testoutput6 vendored
View File

@ -6133,7 +6133,7 @@ No match
/^(?(2)a|(1)(2))+$/ /^(?(2)a|(1)(2))+$/
123a 123a
Failed: error -39: backreference condition or recursion test not supported for DFA matching Failed: error -40: backreference condition or recursion test not supported for DFA matching
/(?<=a|bbbb)c/ /(?<=a|bbbb)c/
ac ac
@ -7087,7 +7087,7 @@ Partial match: dogs
/abc\K123/ /abc\K123/
xyzabc123pqr xyzabc123pqr
Failed: error -40: item unsupported for DFA matching Failed: error -41: item unsupported for DFA matching
/(?<=abc)123/ /(?<=abc)123/
xyzabc123pqr xyzabc123pqr
@ -7205,29 +7205,29 @@ No match
/^(?!a(*SKIP)b)/ /^(?!a(*SKIP)b)/
ac ac
Failed: error -40: item unsupported for DFA matching Failed: error -41: item unsupported for DFA matching
/^(?=a(*SKIP)b|ac)/ /^(?=a(*SKIP)b|ac)/
** Failers ** Failers
No match No match
ac ac
Failed: error -40: item unsupported for DFA matching Failed: error -41: item unsupported for DFA matching
/^(?=a(*THEN)b|ac)/ /^(?=a(*THEN)b|ac)/
ac ac
Failed: error -40: item unsupported for DFA matching Failed: error -41: item unsupported for DFA matching
/^(?=a(*PRUNE)b)/ /^(?=a(*PRUNE)b)/
ab ab
Failed: error -40: item unsupported for DFA matching Failed: error -41: item unsupported for DFA matching
** Failers ** Failers
No match No match
ac ac
Failed: error -40: item unsupported for DFA matching Failed: error -41: item unsupported for DFA matching
/^(?(?!a(*SKIP)b))/ /^(?(?!a(*SKIP)b))/
ac ac
Failed: error -40: item unsupported for DFA matching Failed: error -41: item unsupported for DFA matching
/(?<=abc)def/ /(?<=abc)def/
abc\=ph abc\=ph
@ -7424,7 +7424,7 @@ No match
/((?2))((?1))/ /((?2))((?1))/
abc abc
Failed: error -50: nested recursion at the same subject position Failed: error -51: nested recursion at the same subject position
/(?(R)a+|(?R)b)/ /(?(R)a+|(?R)b)/
aaaabcde aaaabcde
@ -7440,11 +7440,11 @@ Failed: error -50: nested recursion at the same subject position
/((?(R2)a+|(?1)b))/ /((?(R2)a+|(?1)b))/
aaaabcde aaaabcde
Failed: error -39: backreference condition or recursion test not supported for DFA matching Failed: error -40: backreference condition or recursion test not supported for DFA matching
/(?(R)a*(?1)|((?R))b)/ /(?(R)a*(?1)|((?R))b)/
aaaabcde aaaabcde
Failed: error -50: nested recursion at the same subject position Failed: error -51: nested recursion at the same subject position
/(a+)/no_auto_possess /(a+)/no_auto_possess
aaaa\=ovector=3 aaaa\=ovector=3
@ -7593,7 +7593,7 @@ Partial match: \x0d\x0d\x0d
/abcdef/ /abcdef/
abc\=dfa_restart abc\=dfa_restart
Failed: error -37: invalid data in workspace for DFA restart Failed: error -38: invalid data in workspace for DFA restart
/<H((?(?!<H|F>)(.)|(?R))++)*F>/ /<H((?(?!<H|F>)(.)|(?R))++)*F>/
text <H more text <H texting more hexA0-"\xA0" hex above 7F-"\xBC" F> text xxxxx <H text F> text F> text2 <H text sample F> more text. text <H more text <H texting more hexA0-"\xA0" hex above 7F-"\xBC" F> text xxxxx <H text F> text F> text2 <H text sample F> more text.

View File

@ -1218,7 +1218,7 @@ Partial match: the cat
/ab\Cde/utf /ab\Cde/utf
abXde abXde
Failed: error -40: item unsupported for DFA matching Failed: error -41: item unsupported for DFA matching
/(?<=ab\Cde)X/utf /(?<=ab\Cde)X/utf
Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion Failed: error 136 at offset 10: \C is not allowed in a lookbehind assertion