Implement the "rightchar" feature of match data.

This commit is contained in:
Philip.Hazel 2014-08-19 17:07:22 +00:00
parent 55e279eff2
commit ac70cacd29
13 changed files with 335 additions and 129 deletions

View File

@ -1,4 +1,4 @@
.TH PCRE2TEST 1 "12 August 2014" "PCRE 10.00"
.TH PCRE2TEST 1 "19 August 2014" "PCRE 10.00"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@ -633,6 +633,7 @@ not affect the compilation process.
aftertext show text after match
allaftertext show text after captures
allcaptures show all captures
allusedtext show all consulted text
/g global global matching
jitverify verify JIT usage
mark show mark values
@ -691,6 +692,7 @@ pattern.
aftertext show text after match
allaftertext show text after captures
allcaptures show all captures
allusedtext show all consulted text
altglobal alternative global matching
bsr=[anycrlf|unicode] specify \eR handling
callout_capture show captures at callout time
@ -735,6 +737,21 @@ contains multiple copies of the same substring. The \fBallaftertext\fP modifier
requests the same action for captured substrings as well as the main matched
substring. In each case the remainder is output on the following line with a
plus character following the capture number.
.P
The \fBallusedtext\fP modifier requests that all the text that was consulted
during a successful pattern match be shown. This affects the output if there
is a lookbehind at the start of a match, or a lookahead at the end, or if \eK
is used in the pattern. Characters that precede or follow the start and end of
the actual match are indicated in the output by '<' or '>' characters
underneath them. Here is an example:
.sp
/(?<=pqr)abc(?=xyz)/
123pqrabcxyz456\=allusedtext
0: pqrabcxyz
<<< >>>
.sp
This shows that the matched string is "abc", with the preceding and following
strings "pqr" and "xyz" also consulted during the match.
.
.
.SS "Showing the value of all capture groups"
@ -1142,6 +1159,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
Last updated: 12 August 2014
Last updated: 19 August 2014
Copyright (c) 1997-2014 University of Cambridge.
.fi

View File

@ -543,6 +543,8 @@ for (;;)
BOOL partial_newline = FALSE;
BOOL could_continue = reset_could_continue;
reset_could_continue = FALSE;
if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
/* Make the new state list into the active state list and empty the
new state list. */
@ -967,6 +969,14 @@ for (;;)
if (clen > 0)
{
if (ptr >= mb->last_used_ptr)
{
PCRE2_SPTR temp = ptr + 1;
#if defined SUPPORT_UTF && PCRE2_CODE_UNIT_WIDTH != 32
if (utf) { FORWARDCHAR(temp); }
#endif
mb->last_used_ptr = temp;
}
#ifdef SUPPORT_UTF
if ((mb->poptions & PCRE2_UCP) != 0)
{
@ -3447,6 +3457,7 @@ for (;;)
/* OK, now we can do the business */
mb->start_used_ptr = start_match;
mb->last_used_ptr = start_match;
mb->recursive = NULL;
rc = internal_dfa_match(
@ -3471,7 +3482,7 @@ for (;;)
match_data->ovector[1] = (PCRE2_SIZE)(end_subject - subject);
}
match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
match_data->rightchar = 0; /* FIXME */
match_data->rightchar = mb->last_used_ptr - subject;
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
match_data->rc = rc;
return rc;

View File

@ -752,6 +752,7 @@ typedef struct match_block {
PCRE2_SPTR start_match_ptr; /* Start of matched string */
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
PCRE2_SPTR once_target; /* Where to back up to for atomic groups */
@ -783,6 +784,7 @@ typedef struct dfa_match_block {
PCRE2_SPTR start_subject ; /* Start of the subject string */
PCRE2_SPTR end_subject; /* End of subject string */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
const uint8_t *tables; /* Character tables */
PCRE2_SIZE start_offset; /* The start offset value */
uint32_t moptions; /* Match options */

View File

@ -1450,11 +1450,14 @@ for (;;)
break;
/* End of the pattern, either real or forced. */
/* End of the pattern, either real or forced. In an assertion ACCEPT,
update the last used pointer. */
case OP_END:
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
case OP_ACCEPT:
case OP_END:
/* If we have matched an empty string, fail if not in an assertion and not
in a recursion if either PCRE2_NOTEMPTY is set, or if PCRE2_NOTEMPTY_ATSTART
@ -1918,6 +1921,7 @@ for (;;)
mb->end_match_ptr = eptr; /* For ONCE_NC */
mb->end_offset_top = offset_top;
mb->start_match_ptr = mstart;
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
RRETURN(MATCH_MATCH); /* Sets mb->mark */
}
@ -1941,6 +1945,7 @@ for (;;)
{
mb->end_match_ptr = eptr;
mb->start_match_ptr = mstart;
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
RRETURN(MATCH_MATCH);
}
@ -1984,6 +1989,7 @@ for (;;)
mb->start_match_ptr = mstart; /* In case \K reset it */
mb->end_match_ptr = eptr;
mb->end_offset_top = offset_top;
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
RRETURN(MATCH_KETRPOS);
}
@ -2202,6 +2208,9 @@ for (;;)
}
else
{
PCRE2_SPTR nextptr = eptr + 1;
FORWARDCHAR(nextptr);
if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
GETCHAR(c, eptr);
if ((mb->poptions & PCRE2_UCP) != 0)
{
@ -2251,20 +2260,23 @@ for (;;)
cur_is_word = FALSE;
}
else
#ifdef SUPPORT_UTF
if ((mb->poptions & PCRE2_UCP) != 0)
{
c = *eptr;
if (c == '_') cur_is_word = TRUE; else
if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1;
#ifdef SUPPORT_UTF
if ((mb->poptions & PCRE2_UCP) != 0)
{
int cat = UCD_CATEGORY(c);
cur_is_word = (cat == ucp_L || cat == ucp_N);
c = *eptr;
if (c == '_') cur_is_word = TRUE; else
{
int cat = UCD_CATEGORY(c);
cur_is_word = (cat == ucp_L || cat == ucp_N);
}
}
}
else
else
#endif
cur_is_word = MAX_255(*eptr)
&& ((mb->ctypes[*eptr] & ctype_word) != 0);
cur_is_word = MAX_255(*eptr)
&& ((mb->ctypes[*eptr] & ctype_word) != 0);
}
}
/* Now see if the situation is what we want */
@ -6780,6 +6792,7 @@ for(;;)
mb->start_match_ptr = start_match;
mb->start_used_ptr = start_match;
mb->last_used_ptr = start_match;
mb->match_call_count = 0;
mb->match_function_type = 0;
mb->end_offset_top = 0;
@ -6984,10 +6997,11 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
}
/* Set the remaining returned values */
match_data->leftchar = mb->start_used_ptr - subject;
match_data->rightchar = 0; /* FIXME */
match_data->startchar = start_match - subject;
match_data->leftchar = mb->start_used_ptr - subject;
match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
mb->last_used_ptr : mb->end_match_ptr) - subject;
return match_data->rc;
}
@ -7011,9 +7025,9 @@ else if (match_partial != NULL)
match_data->ovector[0] = match_partial - subject;
match_data->ovector[1] = end_subject - subject;
}
match_data->leftchar = start_partial - subject;
match_data->rightchar = 0; /* FIXME */
match_data->startchar = match_partial - subject;
match_data->leftchar = start_partial - subject;
match_data->rightchar = end_subject - subject;
match_data->rc = PCRE2_ERROR_PARTIAL;
}

View File

@ -321,25 +321,26 @@ either on a pattern or a data line, so they must all be distinct. */
#define CTL_AFTERTEXT 0x00000001u
#define CTL_ALLAFTERTEXT 0x00000002u
#define CTL_ALLCAPTURES 0x00000004u
#define CTL_ALTGLOBAL 0x00000008u
#define CTL_BINCODE 0x00000010u
#define CTL_CALLOUT_CAPTURE 0x00000020u
#define CTL_CALLOUT_NONE 0x00000040u
#define CTL_DFA 0x00000080u
#define CTL_FINDLIMITS 0x00000100u
#define CTL_FULLBINCODE 0x00000200u
#define CTL_GETALL 0x00000400u
#define CTL_GLOBAL 0x00000800u
#define CTL_HEXPAT 0x00001000u
#define CTL_INFO 0x00002000u
#define CTL_JITVERIFY 0x00004000u
#define CTL_MARK 0x00008000u
#define CTL_MEMORY 0x00010000u
#define CTL_PATLEN 0x00020000u
#define CTL_POSIX 0x00040000u
#define CTL_ALLUSEDTEXT 0x00000008u
#define CTL_ALTGLOBAL 0x00000010u
#define CTL_BINCODE 0x00000020u
#define CTL_CALLOUT_CAPTURE 0x00000040u
#define CTL_CALLOUT_NONE 0x00000080u
#define CTL_DFA 0x00000100u
#define CTL_FINDLIMITS 0x00000200u
#define CTL_FULLBINCODE 0x00000400u
#define CTL_GETALL 0x00000800u
#define CTL_GLOBAL 0x00001000u
#define CTL_HEXPAT 0x00002000u
#define CTL_INFO 0x00004000u
#define CTL_JITVERIFY 0x00008000u
#define CTL_MARK 0x00010000u
#define CTL_MEMORY 0x00020000u
#define CTL_PATLEN 0x00040000u
#define CTL_POSIX 0x00080000u
#define CTL_BSR_SET 0x00080000u /* This is informational */
#define CTL_NL_SET 0x00100000u /* This is informational */
#define CTL_BSR_SET 0x00100000u /* This is informational */
#define CTL_NL_SET 0x00200000u /* This is informational */
#define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
#define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE) /* For testing */
@ -348,9 +349,15 @@ either on a pattern or a data line, so they must all be distinct. */
/* These are all the controls that may be set either on a pattern or on a
data line. */
#define CTL_ALLPD (CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_ALLCAPTURES|\
CTL_ALTGLOBAL|CTL_GLOBAL|CTL_JITVERIFY|CTL_MARK|\
CTL_MEMORY)
#define CTL_ALLPD (CTL_AFTERTEXT|\
CTL_ALLAFTERTEXT|\
CTL_ALLCAPTURES|\
CTL_ALLUSEDTEXT|\
CTL_ALTGLOBAL|\
CTL_GLOBAL|\
CTL_JITVERIFY|\
CTL_MARK|\
CTL_MEMORY)
typedef struct patctl { /* Structure for pattern modifiers. */
uint32_t options; /* Must be in same position as datctl */
@ -409,6 +416,7 @@ static modstruct modlist[] = {
{ "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
{ "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
{ "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
{ "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
{ "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
{ "altglobal", MOD_PND, MOD_CTL, CTL_ALTGLOBAL, PO(control) },
{ "anchored", MOD_PD, MOD_OPT, PCRE2_ANCHORED, PD(options) },
@ -822,7 +830,7 @@ are supported. */
pcre2_set_character_tables_16(G(a,16),b); \
else \
pcre2_set_character_tables_32(G(a,32),b)
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
if (test_mode == PCRE8_MODE) \
pcre2_set_compile_recursion_guard_8(G(a,8),b); \
@ -1783,7 +1791,7 @@ free(block);
*************************************************/
/* This is set up to be called from pcre2_compile() when the stackguard=n
modifier sets a value greater than zero. The test we do is whether the
modifier sets a value greater than zero. The test we do is whether the
parenthesis nesting depth is greater than the value set by the modifier.
Argument: the current parenthesis nesting depth
@ -2105,7 +2113,7 @@ if (pbuffer16_size < 2*len + 2)
pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
if (pbuffer16 == NULL)
{
fprintf(stderr, "pcretest: malloc(%ld) failed for pbuffer16\n",
fprintf(stderr, "pcretest: malloc(%ld) failed for pbuffer16\n",
pbuffer16_size);
exit(1);
}
@ -2115,7 +2123,7 @@ pp = pbuffer16;
if (!utf)
{
while (len-- > 0) *pp++ = *p++;
}
}
else while (len > 0)
{
uint32_t c;
@ -2182,7 +2190,7 @@ if (pbuffer32_size < 4*len + 4)
pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
if (pbuffer32 == NULL)
{
fprintf(stderr, "pcretest: malloc(%ld) failed for pbuffer32\n",
fprintf(stderr, "pcretest: malloc(%ld) failed for pbuffer32\n",
pbuffer32_size);
exit(1);
}
@ -2192,7 +2200,7 @@ pp = pbuffer32;
if (!utf)
{
while (len-- > 0) *pp++ = *p++;
}
}
else while (len > 0)
{
uint32_t c;
@ -2661,24 +2669,24 @@ for (;;)
case MOD_BSR:
if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
{
#ifdef BSR_ANYCRLF
#ifdef BSR_ANYCRLF
*((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
#else
#else
*((uint16_t *)field) = PCRE2_BSR_UNICODE;
#endif
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_BSR_SET;
else dctl->control &= ~CTL_BSR_SET;
else dctl->control &= ~CTL_BSR_SET;
}
else
{
{
if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
*((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
*((uint16_t *)field) = PCRE2_BSR_UNICODE;
else goto INVALID_VALUE;
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_BSR_SET;
else dctl->control |= CTL_BSR_SET;
}
else dctl->control |= CTL_BSR_SET;
}
pp = ep;
break;
@ -2720,14 +2728,14 @@ for (;;)
{
*((uint16_t *)field) = NEWLINE_DEFAULT;
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_NL_SET;
else dctl->control &= ~CTL_NL_SET;
else dctl->control &= ~CTL_NL_SET;
}
else
{
{
*((uint16_t *)field) = i;
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_NL_SET;
else dctl->control |= CTL_NL_SET;
}
else dctl->control |= CTL_NL_SET;
}
pp = ep;
break;
@ -2835,7 +2843,7 @@ return rc;
/* This function just helps to keep the code that uses it tidier. It's used for
various lists of things where there needs to be introductory text before the
first item. As these calls are all in the POSIX-support code, they happen only
first item. As these calls are all in the POSIX-support code, they happen only
when 8-bit mode is supported. */
static void
@ -2853,7 +2861,7 @@ fprintf(outfile, "%s %s", *msg, s);
* Show compile controls *
*************************************************/
/* Called for unsupported POSIX modifiers, and therefore needed only when the
/* Called for unsupported POSIX modifiers, and therefore needed only when the
8-bit library is supported.
Arguments:
@ -3019,8 +3027,8 @@ if ((pat_patctl.control & CTL_INFO) != 0)
const void *nametable;
const uint8_t *start_bits;
uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
hascrorlf, jchanged, last_ctype, last_cunit, match_empty, match_limit,
maxlookbehind, minlength, nameentrysize, namecount, newline_convention,
hascrorlf, jchanged, last_ctype, last_cunit, match_empty, match_limit,
maxlookbehind, minlength, nameentrysize, namecount, newline_convention,
recursion_limit;
/* These info requests should always succeed. */
@ -3093,69 +3101,69 @@ if ((pat_patctl.control & CTL_INFO) != 0)
pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options);
pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options);
/* Remove UTF/UCP if they were there only because of forbid_utf. This saves
/* Remove UTF/UCP if they were there only because of forbid_utf. This saves
cluttering up the verification output of non-UTF test files. */
if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
{
compile_options &= ~PCRE2_NEVER_UTF;
overall_options &= ~PCRE2_NEVER_UTF;
}
compile_options &= ~PCRE2_NEVER_UTF;
overall_options &= ~PCRE2_NEVER_UTF;
}
if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
{
compile_options &= ~PCRE2_NEVER_UCP;
overall_options &= ~PCRE2_NEVER_UCP;
}
compile_options &= ~PCRE2_NEVER_UCP;
overall_options &= ~PCRE2_NEVER_UCP;
}
if ((compile_options|overall_options) != 0)
{
{
if (compile_options == overall_options)
show_compile_options(compile_options, "Options:", "\n");
show_compile_options(compile_options, "Options:", "\n");
else
{
show_compile_options(compile_options, "Compile options:", "\n");
show_compile_options(overall_options, "Overall options:", "\n");
}
}
}
if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
if ((pat_patctl.control & CTL_BSR_SET) != 0 ||
(FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
(FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
"any Unicode newline" : "CR, LF, or CRLF");
if ((pat_patctl.control & CTL_NL_SET) != 0 ||
(FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
{
{
switch (newline_convention)
{
case PCRE2_NEWLINE_CR:
fprintf(outfile, "Forced newline is CR\n");
break;
case PCRE2_NEWLINE_LF:
fprintf(outfile, "Forced newline is LF\n");
break;
case PCRE2_NEWLINE_CRLF:
fprintf(outfile, "Forced newline is CRLF\n");
break;
case PCRE2_NEWLINE_ANYCRLF:
fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
break;
case PCRE2_NEWLINE_ANY:
fprintf(outfile, "Forced newline is any Unicode newline\n");
break;
default:
break;
}
}
}
if (first_ctype == 2)
{
@ -3223,7 +3231,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
/* FIXME: tidy this up */
if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
{
size_t jitsize;
if (pattern_info(PCRE2_INFO_JITSIZE, &jitsize) == 0)
@ -3270,8 +3278,8 @@ if (restrict_for_perl_test)
if (strncmp((char *)buffer, "#forbid_utf", 11) == 0 && isspace(buffer[11]))
{
forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
}
forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
}
else if (strncmp((char *)buffer, "#pattern", 8) == 0 && isspace(buffer[8]))
{
(void)decode_modifiers(buffer + 8, CTX_DEFPAT, &def_patctl, NULL);
@ -3440,10 +3448,10 @@ PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
/* Set up for the stackguard test. */
if (pat_patctl.stackguard_test != 0)
if (pat_patctl.stackguard_test != 0)
{
PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard);
}
}
/* Handle compiling via the POSIX interface, which doesn't support the
timing, showing, or debugging options, nor the ability to pass over
@ -3455,7 +3463,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
int rc;
int cflags = 0;
const char *msg = "** Ignored with POSIX interface:";
#endif
#endif
if (test_mode != 8)
{
@ -3515,7 +3523,7 @@ modes. */
#ifdef SUPPORT_PCRE8
if (test_mode == PCRE8_MODE)
errorcode = 0;
#endif
#endif
#ifdef SUPPORT_PCRE16
if (test_mode == PCRE16_MODE)
@ -3578,7 +3586,7 @@ if (timeit > 0)
/* A final compile that is used "for real". */
PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|forbid_utf,
PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|forbid_utf,
&errorcode, &erroroffset, pat_context);
/* Compilation failed; go back for another re, skipping to blank line
@ -3865,10 +3873,10 @@ dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
#else
#else
utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
#endif
start_rep = NULL;
len = strlen((const char *)buffer);
while (len > 0 && isspace(buffer[len-1])) len--;
@ -4081,10 +4089,10 @@ while ((c = *p++) != 0)
default:
if (isalnum(c))
{
{
fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
return PR_OK;
}
}
}
/* We now have a character value in c that may be greater than 255.
@ -4265,7 +4273,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
free(pmatch);
return PR_OK;
}
#endif /* SUPPORT_PCRE8 */
#endif /* SUPPORT_PCRE8 */
/* Handle matching via the native interface. Check for consistency of
modifiers. */
@ -4417,7 +4425,10 @@ for (gmatched = 0;; gmatched++)
{
int i;
uint8_t *nptr;
BOOL showallused;
PCRE2_SIZE *ovector;
PCRE2_SIZE leftchar = FLD(match_data, leftchar);
PCRE2_SIZE rightchar = FLD(match_data, rightchar);
/* This is a check against a lunatic return value. */
@ -4440,7 +4451,7 @@ for (gmatched = 0;; gmatched++)
if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
{
uint32_t maxcapcount;
uint32_t maxcapcount;
if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount) < 0)
return PR_SKIP;
capcount = maxcapcount + 1; /* Allow for full match */
@ -4453,6 +4464,7 @@ for (gmatched = 0;; gmatched++)
ovector = FLD(match_data, ovector);
for (i = 0; i < 2*capcount; i += 2)
{
PCRE2_SIZE lleft, lmiddle, lright;
PCRE2_SIZE start = ovector[i];
PCRE2_SIZE end = ovector[i+1];
@ -4470,12 +4482,40 @@ for (gmatched = 0;; gmatched++)
fprintf(outfile, "<unset>\n");
continue;
}
PCHARSV(pp, start, end - start, utf, outfile);
/* For the whole matched string, if ALLUSEDTEXT is set, and if the
leftmost consulted character is before the start of the match or the
rightmost consulted character is past the end of the match, we want to
show all consulted characters, and indicate which were lookarounds. */
showallused = i == 0 && (dat_datctl.control & CTL_ALLUSEDTEXT) != 0 &&
(leftchar < start || rightchar > end);
if (showallused)
{
PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
PCHARS(lmiddle, pp, start, end - start, utf, outfile);
PCHARS(lright, pp, end, rightchar - end, utf, outfile);
}
else
{
PCHARSV(pp, start, end - start, utf, outfile);
}
#ifdef FIXME
if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
#endif
fprintf(outfile, "\n");
if (showallused)
{
PCRE2_SIZE j;
fprintf(outfile, " ");
for (j = 0; j < lleft; j++) fprintf(outfile, "<");
for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
for (j = 0; j < lright; j++) fprintf(outfile, ">");
fprintf(outfile, "\n");
}
/* Note: don't use the start/end variables here because we want to
show the text from what is reported as the end. */
@ -4508,12 +4548,12 @@ for (gmatched = 0;; gmatched++)
PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer,
sizeof(copybuffer)/code_unit_size);
if (rc < 0)
{
{
fprintf(outfile, "copy substring %d failed (%d): ", n, rc);
PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
fprintf(outfile, "\n");
}
}
else
{
fprintf(outfile, "%2dC ", n);
@ -4528,11 +4568,11 @@ for (gmatched = 0;; gmatched++)
for (;;)
{
int rc;
PCRE2_SIZE cnl;
PCRE2_SIZE cnl;
uint32_t copybuffer[256];
int namelen = strlen((const char *)nptr);
if (namelen == 0) break;
cnl = namelen;
cnl = namelen;
#ifdef SUPPORT_PCRE8
if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
@ -4571,12 +4611,12 @@ for (gmatched = 0;; gmatched++)
uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer);
if (rc < 0)
{
{
fprintf(outfile, "get substring %d failed (%d): ", n, rc);
PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
fprintf(outfile, "\n");
}
}
else
{
fprintf(outfile, "%2dG ", n);
@ -4591,12 +4631,12 @@ for (gmatched = 0;; gmatched++)
nptr = dat_datctl.get_names;
for (;;)
{
PCRE2_SIZE cnl;
PCRE2_SIZE cnl;
void *gotbuffer;
int rc;
int namelen = strlen((const char *)nptr);
if (namelen == 0) break;
cnl = namelen;
cnl = namelen;
#ifdef SUPPORT_PCRE8
if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
@ -4635,12 +4675,12 @@ for (gmatched = 0;; gmatched++)
size_t *lengths;
PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
if (rc < 0)
{
{
fprintf(outfile, "get substring list failed (%d): ", rc);
PCRE2_GET_ERROR_MESSAGE(rc, rc, pbuffer);
PCHARSV(CASTVAR(void *, pbuffer), 0, rc, FALSE, outfile);
fprintf(outfile, "\n");
}
}
else
{
for (i = 0; i < capcount; i++)
@ -4716,15 +4756,15 @@ for (gmatched = 0;; gmatched++)
else if (utf && test_mode != PCRE32_MODE)
{
if (test_mode == PCRE8_MODE)
{
{
for (; end_offset < ulen; end_offset++)
if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
}
}
else /* 16-bit mode */
{
for (; end_offset < ulen; end_offset++)
if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
}
}
}
SETFLDVEC(match_data, ovector, 0, start_offset);
@ -5016,7 +5056,7 @@ printf(" 32-bit support\n");
(void)PCRE2_CONFIG(PCRE2_CONFIG_UTF, &rc, sizeof(rc));
if (rc != 0)
printf(" UTF support (Unicode version %s)\n", uversion);
else
else
printf(" No UTF support\n");
(void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &rc, sizeof(rc));
if (rc != 0)
@ -5089,7 +5129,7 @@ if (PO(options) != DO(options) || PO(control) != DO(control))
/* Get the PCRE2 and Unicode version number information. */
PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version, sizeof(VERSION_TYPE)*VERSION_SIZE);
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion,
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion,
sizeof(VERSION_TYPE)*VERSION_SIZE);
/* Get buffers from malloc() so that valgrind will check their misuse when
@ -5196,18 +5236,18 @@ while (argc > 1 && argv[op][0] == '-')
/* Set some common pattern and subject controls */
else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE;
else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG;
else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO;
else if (strcmp(arg, "-jit") == 0)
{
def_patctl.jit = 7; /* full & partial */
def_patctl.jit = 7; /* full & partial */
#ifndef SUPPORT_JIT
fprintf(stderr, "** Warning: JIT support is not available: "
"-jit calls dummy functions.\n");
#endif
}
#endif
}
/* Set timing parameters */
@ -5298,7 +5338,7 @@ if (test_mode == PCRE8_MODE)
match_data8 = pcre2_match_data_create_8(max_oveccount, general_context8);
#ifdef HEAP_MATCH_RECURSE
(void)pcre2_set_recursion_memory_management_8(default_dat_context8,
&my_stack_malloc, &my_stack_free, NULL);
&my_stack_malloc, &my_stack_free, NULL);
#endif
}
#endif
@ -5315,7 +5355,7 @@ if (test_mode == PCRE16_MODE)
match_data16 = pcre2_match_data_create_16(max_oveccount, general_context16);
#ifdef HEAP_MATCH_RECURSE
(void)pcre2_set_recursion_memory_management_16(default_dat_context16,
&my_stack_malloc, &my_stack_free, NULL);
&my_stack_malloc, &my_stack_free, NULL);
#endif
}
#endif
@ -5332,7 +5372,7 @@ if (test_mode == PCRE32_MODE)
match_data32 = pcre2_match_data_create_32(max_oveccount, general_context32);
#ifdef HEAP_MATCH_RECURSE
(void)pcre2_set_recursion_memory_management_32(default_dat_context32,
&my_stack_malloc, &my_stack_free, NULL);
&my_stack_malloc, &my_stack_free, NULL);
#endif
}
#endif
@ -5394,9 +5434,9 @@ while (notdone)
uint8_t *p;
int rc = PR_OK;
BOOL expectdata = TEST(compiled_code, !=, NULL);
#ifdef SUPPORT_PCRE8
#ifdef SUPPORT_PCRE8
expectdata |= preg.re_pcre2_code != NULL;
#endif
#endif
if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL)
break;
@ -5413,14 +5453,14 @@ while (notdone)
while (isspace(*p)) p++;
if (*p == 0)
{
#ifdef SUPPORT_PCRE8
#ifdef SUPPORT_PCRE8
if (preg.re_pcre2_code != NULL)
{
regfree(&preg);
preg.re_pcre2_code = NULL;
preg.re_match_data = NULL;
}
#endif /* SUPPORT_PCRE8 */
#endif /* SUPPORT_PCRE8 */
if (TEST(compiled_code, !=, NULL))
{
SUB1(pcre2_code_free, compiled_code);
@ -5451,10 +5491,10 @@ while (notdone)
else
{
while (isspace(*p)) p++;
while (isspace(*p)) p++;
if (*p != 0)
{
fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
*buffer);
rc = PR_SKIP;
}

21
testdata/testinput2 vendored
View File

@ -4049,4 +4049,25 @@ a random value. /Ix
aaaabcde
aaaabcde\=ovector=100
/abc(?=xyz)/allusedtext
abcxyzpqr
abcxyzpqr\=aftertext
/(?<=pqr)abc(?=xyz)/allusedtext
xyzpqrabcxyzpqr
xyzpqrabcxyzpqr\=aftertext
/a\b/
a.\=allusedtext
a\=allusedtext
/abc\Kxyz/
abcxyz\=allusedtext
/abc(?=xyz(*ACCEPT))/
abcxyz\=allusedtext
/abc(?=abcde)(?=ab)/allusedtext
abcabcdefg
# End of testinput2

3
testdata/testinput5 vendored
View File

@ -1630,4 +1630,7 @@
/\X?abc/utf,no_start_optimize
\xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06
/(?<=\x{100})\x{200}(?=\x{300})/utf,allusedtext
\x{100}\x{200}\x{300}
# End of testinput5

17
testdata/testinput6 vendored
View File

@ -4783,4 +4783,19 @@
'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
# End of testinput8
/abc(?=xyz)/allusedtext
abcxyzpqr
abcxyzpqr\=aftertext
/(?<=pqr)abc(?=xyz)/allusedtext
xyzpqrabcxyzpqr
xyzpqrabcxyzpqr\=aftertext
/a\b/
a.\=allusedtext
a\=allusedtext
/abc(?=abcde)(?=ab)/allusedtext
abcabcdefg
# End of testinput6

3
testdata/testinput7 vendored
View File

@ -2123,4 +2123,7 @@
A\x{2005}Z
A\x{85}\x{180e}\x{2005}Z
/(?<=\x{100})\x{200}(?=\x{300})/utf,allusedtext
\x{100}\x{200}\x{300}
# End of testinput7

40
testdata/testoutput2 vendored
View File

@ -13680,4 +13680,44 @@ No match
aaaabcde\=ovector=100
0: aaaab
/abc(?=xyz)/allusedtext
abcxyzpqr
0: abcxyz
>>>
abcxyzpqr\=aftertext
0: abcxyz
>>>
0+ xyzpqr
/(?<=pqr)abc(?=xyz)/allusedtext
xyzpqrabcxyzpqr
0: pqrabcxyz
<<< >>>
xyzpqrabcxyzpqr\=aftertext
0: pqrabcxyz
<<< >>>
0+ xyzpqr
/a\b/
a.\=allusedtext
0: a.
>
a\=allusedtext
0: a
/abc\Kxyz/
abcxyz\=allusedtext
0: abcxyz
<<<
/abc(?=xyz(*ACCEPT))/
abcxyz\=allusedtext
0: abcxyz
>>>
/abc(?=abcde)(?=ab)/allusedtext
abcabcdefg
0: abcabcde
>>>>>
# End of testinput2

View File

@ -4001,4 +4001,9 @@ Subject length lower bound = 1
\xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06
0: A\x{300}abc
/(?<=\x{100})\x{200}(?=\x{300})/utf,allusedtext
\x{100}\x{200}\x{300}
0: \x{100}\x{200}\x{300}
<<<<<<< >>>>>>>
# End of testinput5

32
testdata/testoutput6 vendored
View File

@ -7659,4 +7659,34 @@ Matched, but offsets vector is too small to show all matches
NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
0: NON QUOTED "QUOT""ED" AFTER
# End of testinput8
/abc(?=xyz)/allusedtext
abcxyzpqr
0: abcxyz
>>>
abcxyzpqr\=aftertext
0: abcxyz
>>>
0+ xyzpqr
/(?<=pqr)abc(?=xyz)/allusedtext
xyzpqrabcxyzpqr
0: pqrabcxyz
<<< >>>
xyzpqrabcxyzpqr\=aftertext
0: pqrabcxyz
<<< >>>
0+ xyzpqr
/a\b/
a.\=allusedtext
0: a.
>
a\=allusedtext
0: a
/abc(?=abcde)(?=ab)/allusedtext
abcabcdefg
0: abcabcde
>>>>>
# End of testinput6

View File

@ -3771,4 +3771,9 @@ No match
A\x{85}\x{180e}\x{2005}Z
0: A\x{85}\x{180e}\x{2005}Z
/(?<=\x{100})\x{200}(?=\x{300})/utf,allusedtext
\x{100}\x{200}\x{300}
0: \x{100}\x{200}\x{300}
<<<<<<< >>>>>>>
# End of testinput7