Add "allvector" to pcre2test.
This commit is contained in:
parent
142c667bbc
commit
3fce7c75e9
10
ChangeLog
10
ChangeLog
|
@ -2,9 +2,17 @@ Change Log for PCRE2
|
|||
--------------------
|
||||
|
||||
|
||||
Version 10.32-RC1 10-September-2018
|
||||
Version 10.33-RC1 15-September-2018
|
||||
-----------------------------------
|
||||
|
||||
1. Added "allvector" to pcre2test to make it easy to check the part of the
|
||||
ovector that shouldn't be changed, in particular after substitute and failed or
|
||||
partial matches.
|
||||
|
||||
|
||||
Version 10.32 10-September-2018
|
||||
-------------------------------
|
||||
|
||||
1. When matching using the the REG_STARTEND feature of the POSIX API with a
|
||||
non-zero starting offset, unset capturing groups with lower numbers than a
|
||||
group that did capture something were not being correctly returned as "unset"
|
||||
|
|
|
@ -9,9 +9,9 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
|
|||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||
|
||||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [32])
|
||||
m4_define(pcre2_prerelease, [])
|
||||
m4_define(pcre2_date, [2018-09-10])
|
||||
m4_define(pcre2_minor, [33])
|
||||
m4_define(pcre2_prerelease, [-RC1])
|
||||
m4_define(pcre2_date, [2018-09-14])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "21 July 2018" "PCRE 10.32"
|
||||
.TH PCRE2TEST 1 "15 September 2018" "PCRE 10.33"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -1003,6 +1003,7 @@ process.
|
|||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
allvector show the entire ovector
|
||||
allusedtext show all consulted text
|
||||
altglobal alternative global matching
|
||||
/g global global matching
|
||||
|
@ -1154,6 +1155,7 @@ pattern.
|
|||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
allvector show the entire ovector
|
||||
allusedtext show all consulted text (non-JIT only)
|
||||
altglobal alternative global matching
|
||||
callout_capture show captures at callout time
|
||||
|
@ -1248,7 +1250,25 @@ captured parentheses be output after a match. By default, only those up to the
|
|||
highest one actually used in the match are output (corresponding to the return
|
||||
code from \fBpcre2_match()\fP). Groups that did not take part in the match
|
||||
are output as "<unset>". This modifier is not relevant for DFA matching (which
|
||||
does no capturing); it is ignored, with a warning message, if present.
|
||||
does no capturing) and does not apply when \fBreplace\fP is specified; it is
|
||||
ignored, with a warning message, if present.
|
||||
.
|
||||
.
|
||||
.SS "Showing the entire ovector, for all outcomes"
|
||||
.rs
|
||||
.sp
|
||||
The \fBallvector\fP modifier requests that the entire ovector be shown,
|
||||
whatever the outcome of the match. Compare \fBallcaptures\fP, which shows only
|
||||
up to the maximum number of capture groups for the pattern, and then only for a
|
||||
successful complete non-DFA match. This modifier, which acts after any match
|
||||
result, and also for DFA matching, provides a means of checking that there are
|
||||
no unexpected modifications to ovector fields. Before each match attempt, the
|
||||
ovector is filled with a special value, and if this is found in both elements
|
||||
of a capturing pair, "<unchanged>" is output. After a successful match, this
|
||||
applies to all groups after the maximum capture group for the pattern. In other
|
||||
cases it applies to the entire ovector. After a partial match, the first two
|
||||
elements are the only ones that should be set. After a DFA match, the amount of
|
||||
ovector that is used depends on the number of matches that were found.
|
||||
.
|
||||
.
|
||||
.SS "Testing callouts"
|
||||
|
@ -1982,6 +2002,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 21 July 2018
|
||||
Last updated: 15 September 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
.fi
|
||||
|
|
151
src/pcre2test.c
151
src/pcre2test.c
|
@ -491,6 +491,7 @@ so many of them that they are split into two fields. */
|
|||
#define CTL2_SUBJECT_LITERAL 0x00000010u
|
||||
#define CTL2_CALLOUT_NO_WHERE 0x00000020u
|
||||
#define CTL2_CALLOUT_EXTRA 0x00000040u
|
||||
#define CTL2_ALLVECTOR 0x00000080u
|
||||
|
||||
#define CTL2_NL_SET 0x40000000u /* Informational */
|
||||
#define CTL2_BSR_SET 0x80000000u /* Informational */
|
||||
|
@ -513,7 +514,8 @@ different things in the two cases. */
|
|||
#define CTL2_ALLPD (CTL2_SUBSTITUTE_EXTENDED|\
|
||||
CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
|
||||
CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
|
||||
CTL2_SUBSTITUTE_UNSET_EMPTY)
|
||||
CTL2_SUBSTITUTE_UNSET_EMPTY|\
|
||||
CTL2_ALLVECTOR)
|
||||
|
||||
/* Structures for holding modifier information for patterns and subject strings
|
||||
(data). Fields containing modifiers that can be set either for a pattern or a
|
||||
|
@ -592,6 +594,7 @@ static modstruct modlist[] = {
|
|||
{ "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
|
||||
{ "allow_surrogate_escapes", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
|
||||
{ "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
|
||||
{ "allvector", MOD_PND, MOD_CTL, CTL2_ALLVECTOR, PO(control2) },
|
||||
{ "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
|
||||
{ "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
|
||||
{ "alt_verbnames", MOD_PAT, MOD_OPT, PCRE2_ALT_VERBNAMES, PO(options) },
|
||||
|
@ -888,6 +891,7 @@ static uint32_t forbid_utf = 0;
|
|||
static uint32_t maxlookbehind;
|
||||
static uint32_t max_oveccount;
|
||||
static uint32_t callout_count;
|
||||
static uint32_t maxcapcount;
|
||||
|
||||
static uint16_t local_newline_default = 0;
|
||||
|
||||
|
@ -4018,12 +4022,13 @@ Returns: nothing
|
|||
static void
|
||||
show_controls(uint32_t controls, uint32_t controls2, const char *before)
|
||||
{
|
||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
before,
|
||||
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
||||
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
||||
((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
|
||||
((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
|
||||
((controls2 & CTL2_ALLVECTOR) != 0)? " allvector" : "",
|
||||
((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
|
||||
((controls & CTL_BINCODE) != 0)? " bincode" : "",
|
||||
((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
|
||||
|
@ -5717,6 +5722,11 @@ if (forbid_utf != 0)
|
|||
if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
|
||||
return PR_ABEND;
|
||||
|
||||
/* Remember the number of captures. */
|
||||
|
||||
if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
|
||||
return PR_ABEND;
|
||||
|
||||
/* If an explicit newline modifier was given, set the information flag in the
|
||||
pattern so that it is preserved over push/pop. */
|
||||
|
||||
|
@ -6317,6 +6327,42 @@ return TRUE;
|
|||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Show an entire ovector *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called after partial matching or match failure, when the
|
||||
"allvector" modifier is set. It is a means of checking the contents of the
|
||||
entire ovector, to ensure no modification of fields that should be unchanged.
|
||||
|
||||
Arguments:
|
||||
ovector points to the ovector
|
||||
oveccount number of pairs
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
show_ovector(PCRE2_SIZE *ovector, uint32_t oveccount)
|
||||
{
|
||||
uint32_t i;
|
||||
for (i = 0; i < 2*oveccount; i += 2)
|
||||
{
|
||||
PCRE2_SIZE start = ovector[i];
|
||||
PCRE2_SIZE end = ovector[i+1];
|
||||
|
||||
fprintf(outfile, "%2d: ", i/2);
|
||||
if (start == PCRE2_UNSET && end == PCRE2_UNSET)
|
||||
fprintf(outfile, "<unset>\n");
|
||||
else if (start == JUNK_OFFSET && end == JUNK_OFFSET)
|
||||
fprintf(outfile, "<unchanged>\n");
|
||||
else
|
||||
fprintf(outfile, "%ld %ld\n", (unsigned long int)start,
|
||||
(unsigned long int)end);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Process a data line *
|
||||
*************************************************/
|
||||
|
@ -6342,7 +6388,10 @@ size_t needlen;
|
|||
void *use_dat_context;
|
||||
BOOL utf;
|
||||
BOOL subject_literal;
|
||||
|
||||
PCRE2_SIZE *ovector;
|
||||
PCRE2_SIZE ovecsave[3];
|
||||
uint32_t oveccount;
|
||||
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
uint8_t *q8 = NULL;
|
||||
|
@ -6722,11 +6771,23 @@ for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
|
|||
}
|
||||
}
|
||||
|
||||
if (pat_patctl.replacement[0] != 0 &&
|
||||
(dat_datctl.control & CTL_NULLCONTEXT) != 0)
|
||||
if (pat_patctl.replacement[0] != 0)
|
||||
{
|
||||
fprintf(outfile, "** Replacement text is not supported with null_context.\n");
|
||||
return PR_OK;
|
||||
if ((dat_datctl.control & CTL_NULLCONTEXT) != 0)
|
||||
{
|
||||
fprintf(outfile, "** Replacement text is not supported with null_context.\n");
|
||||
return PR_OK;
|
||||
}
|
||||
if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
|
||||
fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
|
||||
}
|
||||
|
||||
/* Warn for modifiers that are ignored for DFA. */
|
||||
|
||||
if ((dat_datctl.control & CTL_DFA) != 0)
|
||||
{
|
||||
if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
|
||||
fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
|
||||
}
|
||||
|
||||
/* We now have the subject in dbuffer, with len containing the byte length, and
|
||||
|
@ -6955,6 +7016,9 @@ if (CASTVAR(void *, match_data) == NULL)
|
|||
return PR_OK;
|
||||
}
|
||||
|
||||
ovector = FLD(match_data, ovector);
|
||||
PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
|
||||
|
||||
/* Replacement processing is ignored for DFA matching. */
|
||||
|
||||
if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
|
||||
|
@ -6974,7 +7038,7 @@ if (dat_datctl.replacement[0] != 0)
|
|||
uint8_t rbuffer[REPLACE_BUFFSIZE];
|
||||
uint8_t nbuffer[REPLACE_BUFFSIZE];
|
||||
uint32_t xoptions;
|
||||
PCRE2_SIZE rlen, nsize, erroroffset;
|
||||
PCRE2_SIZE j, rlen, nsize, erroroffset;
|
||||
BOOL badutf = FALSE;
|
||||
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
|
@ -6987,6 +7051,11 @@ if (dat_datctl.replacement[0] != 0)
|
|||
uint32_t *r32 = NULL;
|
||||
#endif
|
||||
|
||||
/* Fill the ovector with junk to detect elements that do not get set
|
||||
when they should be (relevant only when "allvector" is specified). */
|
||||
|
||||
for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
|
||||
|
||||
if (timeitm)
|
||||
fprintf(outfile, "** Timing is not supported with replace: ignored\n");
|
||||
|
||||
|
@ -7112,6 +7181,12 @@ if (dat_datctl.replacement[0] != 0)
|
|||
|
||||
fprintf(outfile, "\n");
|
||||
show_memory = FALSE;
|
||||
|
||||
/* Show final ovector contents if requested. */
|
||||
|
||||
if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
|
||||
show_ovector(ovector, oveccount);
|
||||
|
||||
return PR_OK;
|
||||
} /* End of substitution handling */
|
||||
|
||||
|
@ -7125,14 +7200,11 @@ for (gmatched = 0;; gmatched++)
|
|||
{
|
||||
PCRE2_SIZE j;
|
||||
int capcount;
|
||||
PCRE2_SIZE *ovector;
|
||||
|
||||
ovector = FLD(match_data, ovector);
|
||||
|
||||
/* Fill the ovector with junk to detect elements that do not get set
|
||||
when they should be. */
|
||||
|
||||
for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
|
||||
for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
|
||||
|
||||
/* When matching is via pcre2_match(), we will detect the use of JIT via the
|
||||
stack callback function. */
|
||||
|
@ -7280,12 +7352,8 @@ for (gmatched = 0;; gmatched++)
|
|||
if (capcount >= 0)
|
||||
{
|
||||
int i;
|
||||
uint32_t oveccount;
|
||||
|
||||
/* This is a check against a lunatic return value. */
|
||||
|
||||
PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
|
||||
if (capcount > (int)oveccount)
|
||||
if (capcount > (int)oveccount) /* Check for lunatic return value */
|
||||
{
|
||||
fprintf(outfile,
|
||||
"** PCRE2 error: returned count %d is too big for ovector count %d\n",
|
||||
|
@ -7325,24 +7393,18 @@ for (gmatched = 0;; gmatched++)
|
|||
/* "allcaptures" requests showing of all captures in the pattern, to check
|
||||
unset ones at the end. It may be set on the pattern or the data. Implement
|
||||
by setting capcount to the maximum. This is not relevant for DFA matching,
|
||||
so ignore it. */
|
||||
so ignore it (warning given above). */
|
||||
|
||||
if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
|
||||
if ((dat_datctl.control & (CTL_ALLCAPTURES|CTL_DFA)) == CTL_ALLCAPTURES)
|
||||
{
|
||||
uint32_t maxcapcount;
|
||||
if ((dat_datctl.control & CTL_DFA) != 0)
|
||||
{
|
||||
fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
|
||||
return PR_SKIP;
|
||||
capcount = maxcapcount + 1; /* Allow for full match */
|
||||
if (capcount > (int)oveccount) capcount = oveccount;
|
||||
}
|
||||
capcount = maxcapcount + 1; /* Allow for full match */
|
||||
if (capcount > (int)oveccount) capcount = oveccount;
|
||||
}
|
||||
|
||||
/* "allvector" request showing the entire ovector. */
|
||||
|
||||
if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) capcount = oveccount;
|
||||
|
||||
/* Output the captured substrings. Note that, for the matched string,
|
||||
the use of \K in an assertion can make the start later than the end. */
|
||||
|
||||
|
@ -7364,19 +7426,26 @@ for (gmatched = 0;; gmatched++)
|
|||
|
||||
/* Check for an unset group */
|
||||
|
||||
if (start == PCRE2_UNSET)
|
||||
if (start == PCRE2_UNSET && end == PCRE2_UNSET)
|
||||
{
|
||||
fprintf(outfile, "<unset>\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Check for silly offsets, in particular, values that have not been
|
||||
set when they should have been. */
|
||||
set when they should have been. However, if we are past the end of the
|
||||
captures for this pattern ("allvector" causes this), or if we are DFA
|
||||
matching, it isn't an error if the entry is unchanged. */
|
||||
|
||||
if (start > ulen || end > ulen)
|
||||
{
|
||||
fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
|
||||
(unsigned long int)start, (unsigned long int)end);
|
||||
if (((dat_datctl.control & CTL_DFA) != 0 ||
|
||||
i >= (int)(2*maxcapcount + 2)) &&
|
||||
start == JUNK_OFFSET && end == JUNK_OFFSET)
|
||||
fprintf(outfile, "<unchanged>\n");
|
||||
else
|
||||
fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
|
||||
(unsigned long int)start, (unsigned long int)end);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -7517,10 +7586,19 @@ for (gmatched = 0;; gmatched++)
|
|||
fprintf(outfile, "\n");
|
||||
}
|
||||
|
||||
if (ulen != ovector[1])
|
||||
fprintf(outfile, "** ovector[1] is not equal to the subject length: "
|
||||
"%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen);
|
||||
|
||||
/* Process copy/get strings */
|
||||
|
||||
if (!copy_and_get(utf, 1)) return PR_ABEND;
|
||||
|
||||
/* "allvector" outputs the entire vector */
|
||||
|
||||
if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
|
||||
show_ovector(ovector, oveccount);
|
||||
|
||||
break; /* Out of the /g loop */
|
||||
} /* End of handling partial match */
|
||||
|
||||
|
@ -7590,6 +7668,11 @@ for (gmatched = 0;; gmatched++)
|
|||
if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
|
||||
fprintf(outfile, " (JIT)");
|
||||
fprintf(outfile, "\n");
|
||||
|
||||
/* "allvector" outputs the entire vector */
|
||||
|
||||
if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
|
||||
show_ovector(ovector, oveccount);
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
|
@ -5505,4 +5505,13 @@ a)"xI
|
|||
bbc
|
||||
xbc
|
||||
|
||||
/a(b)c|xyz/g,allvector,replace=<$0>
|
||||
abcdefabcpqr\=ovector=4
|
||||
abxyz\=ovector=4
|
||||
abcdefxyz\=ovector=4
|
||||
|
||||
/a(b)c|xyz/allvector
|
||||
abcdef\=ovector=4
|
||||
abxyz\=ovector=4
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -16763,6 +16763,38 @@ Subject length lower bound = 1
|
|||
0: b
|
||||
0+ c
|
||||
|
||||
/a(b)c|xyz/g,allvector,replace=<$0>
|
||||
abcdefabcpqr\=ovector=4
|
||||
2: <abc>def<abc>pqr
|
||||
0: 6 9
|
||||
1: 7 8
|
||||
2: <unchanged>
|
||||
3: <unchanged>
|
||||
abxyz\=ovector=4
|
||||
1: ab<xyz>
|
||||
0: 2 5
|
||||
1: <unset>
|
||||
2: <unchanged>
|
||||
3: <unchanged>
|
||||
abcdefxyz\=ovector=4
|
||||
2: <abc>def<xyz>
|
||||
0: 6 9
|
||||
1: <unset>
|
||||
2: <unchanged>
|
||||
3: <unchanged>
|
||||
|
||||
/a(b)c|xyz/allvector
|
||||
abcdef\=ovector=4
|
||||
0: abc
|
||||
1: b
|
||||
2: <unchanged>
|
||||
3: <unchanged>
|
||||
abxyz\=ovector=4
|
||||
0: xyz
|
||||
1: <unset>
|
||||
2: <unchanged>
|
||||
3: <unchanged>
|
||||
|
||||
# End of testinput2
|
||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
|
|
Loading…
Reference in New Issue