Fix matching offsets from regexec() in the POSIX wrapper when called with
REG_STARTEND and a starting offset greater than zero.
This commit is contained in:
parent
8e4b992682
commit
e4c86e2ced
|
@ -179,6 +179,9 @@ deeply. (Compare item 10.23/36.) This should fix oss-fuzz #1761.
|
||||||
|
|
||||||
37. Implement PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL.
|
37. Implement PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL.
|
||||||
|
|
||||||
|
38. Fix returned offsets from regexec() when REG_STARTEND is used with a
|
||||||
|
starting offset greater than zero.
|
||||||
|
|
||||||
|
|
||||||
Version 10.23 14-February-2017
|
Version 10.23 14-February-2017
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2POSIX 3 "31 January 2016" "PCRE2 10.22"
|
.TH PCRE2POSIX 3 "03 June 2017" "PCRE2 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH "SYNOPSIS"
|
.SH "SYNOPSIS"
|
||||||
|
@ -204,15 +204,21 @@ function.
|
||||||
.sp
|
.sp
|
||||||
REG_STARTEND
|
REG_STARTEND
|
||||||
.sp
|
.sp
|
||||||
The string is considered to start at \fIstring\fP + \fIpmatch[0].rm_so\fP and
|
When this option is set, the string is considered to start at \fIstring\fP +
|
||||||
to have a terminating NUL located at \fIstring\fP + \fIpmatch[0].rm_eo\fP
|
\fIpmatch[0].rm_so\fP and to have a terminating NUL located at \fIstring\fP +
|
||||||
(there need not actually be a NUL at that location), regardless of the value of
|
\fIpmatch[0].rm_eo\fP (there need not actually be a NUL at that location),
|
||||||
\fInmatch\fP. This is a BSD extension, compatible with but not specified by
|
regardless of the value of \fInmatch\fP. However, the offsets of the matched
|
||||||
IEEE Standard 1003.2 (POSIX.2), and should be used with caution in software
|
string and any captured substrings are still given relative to the start of
|
||||||
intended to be portable to other systems. Note that a non-zero \fIrm_so\fP does
|
\fIstring\fP. (Before PCRE2 release 10.30 these were given relative to
|
||||||
not imply REG_NOTBOL; REG_STARTEND affects only the location of the string, not
|
\fIstring\fP + \fIpmatch[0].rm_so\fP, but this differs from other
|
||||||
how it is matched. Setting REG_STARTEND and passing \fIpmatch\fP as NULL are
|
implementations.)
|
||||||
mutually exclusive; the error REG_INVARG is returned.
|
.P
|
||||||
|
This is a BSD extension, compatible with but not specified by IEEE Standard
|
||||||
|
1003.2 (POSIX.2), and should be used with caution in software intended to be
|
||||||
|
portable to other systems. Note that a non-zero \fIrm_so\fP does not imply
|
||||||
|
REG_NOTBOL; REG_STARTEND affects only the location of the string, not how it is
|
||||||
|
matched. Setting REG_STARTEND and passing \fIpmatch\fP as NULL are mutually
|
||||||
|
exclusive; the error REG_INVARG is returned.
|
||||||
.P
|
.P
|
||||||
If the pattern was compiled with the REG_NOSUB flag, no data about any matched
|
If the pattern was compiled with the REG_NOSUB flag, no data about any matched
|
||||||
strings is returned. The \fInmatch\fP and \fIpmatch\fP arguments of
|
strings is returned. The \fInmatch\fP and \fIpmatch\fP arguments of
|
||||||
|
@ -271,6 +277,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 31 January 2016
|
Last updated: 03 June 2017
|
||||||
Copyright (c) 1997-2016 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2TEST 1 "01 June 2017" "PCRE 10.30"
|
.TH PCRE2TEST 1 "03 June 2017" "PCRE 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -1046,6 +1046,20 @@ wrapper API to be used, the only option-setting modifiers that have any effect
|
||||||
are \fBnotbol\fP, \fBnotempty\fP, and \fBnoteol\fP, causing REG_NOTBOL,
|
are \fBnotbol\fP, \fBnotempty\fP, and \fBnoteol\fP, causing REG_NOTBOL,
|
||||||
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to \fBregexec()\fP.
|
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to \fBregexec()\fP.
|
||||||
The other modifiers are ignored, with a warning message.
|
The other modifiers are ignored, with a warning message.
|
||||||
|
.P
|
||||||
|
There is one additional modifier that can be used with the POSIX wrapper. It is
|
||||||
|
ignored (with a warning) if used for non-POSIX matching.
|
||||||
|
.sp
|
||||||
|
posix_startend=<n>[:<m>]
|
||||||
|
.sp
|
||||||
|
This causes the subject string to be passed to \fBregexec()\fP using the
|
||||||
|
REG_STARTEND option, which uses offsets to restrict which part of the string is
|
||||||
|
searched. If only one number is given, the end offset is passed as the end of
|
||||||
|
the subject string. For more detail of REG_STARTEND, see the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2posix\fP
|
||||||
|
.\"
|
||||||
|
documentation.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SS "Setting match controls"
|
.SS "Setting match controls"
|
||||||
|
@ -1793,6 +1807,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 01 June 2017
|
Last updated: 03 June 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -338,8 +338,8 @@ if (rc >= 0)
|
||||||
if ((size_t)rc > nmatch) rc = (int)nmatch;
|
if ((size_t)rc > nmatch) rc = (int)nmatch;
|
||||||
for (i = 0; i < (size_t)rc; i++)
|
for (i = 0; i < (size_t)rc; i++)
|
||||||
{
|
{
|
||||||
pmatch[i].rm_so = ovector[i*2];
|
pmatch[i].rm_so = ovector[i*2] + so;
|
||||||
pmatch[i].rm_eo = ovector[i*2+1];
|
pmatch[i].rm_eo = ovector[i*2+1] + so;
|
||||||
}
|
}
|
||||||
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -186,7 +186,7 @@ void vms_setsymbol( char *, char *, int );
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define CFORE_UNSET UINT32_MAX /* Unset value for cfail/cerror fields */
|
#define CFORE_UNSET UINT32_MAX /* Unset value for startend/cfail/cerror fields */
|
||||||
#define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
|
#define CONVERT_UNSET UINT32_MAX /* Unset value for convert_type field */
|
||||||
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
|
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
|
||||||
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
|
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
|
||||||
|
@ -538,6 +538,7 @@ typedef struct datctl { /* Structure for data line modifiers. */
|
||||||
uint32_t control; /* Must be in same position as patctl */
|
uint32_t control; /* Must be in same position as patctl */
|
||||||
uint32_t control2; /* Must be in same position as patctl */
|
uint32_t control2; /* Must be in same position as patctl */
|
||||||
uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
|
uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
|
||||||
|
uint32_t startend[2];
|
||||||
uint32_t cerror[2];
|
uint32_t cerror[2];
|
||||||
uint32_t cfail[2];
|
uint32_t cfail[2];
|
||||||
int32_t callout_data;
|
int32_t callout_data;
|
||||||
|
@ -662,6 +663,7 @@ static modstruct modlist[] = {
|
||||||
{ "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
|
{ "ph", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_HARD, DO(options) },
|
||||||
{ "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
|
{ "posix", MOD_PAT, MOD_CTL, CTL_POSIX, PO(control) },
|
||||||
{ "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
|
{ "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
|
||||||
|
{ "posix_startend", MOD_DAT, MOD_IN2, 0, DO(startend) },
|
||||||
{ "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
|
{ "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
|
||||||
{ "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
|
{ "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
|
||||||
{ "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
|
{ "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
|
||||||
|
@ -6660,6 +6662,14 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dat_datctl.startend[0] != CFORE_UNSET)
|
||||||
|
{
|
||||||
|
pmatch[0].rm_so = dat_datctl.startend[0];
|
||||||
|
pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
|
||||||
|
dat_datctl.startend[1] : len;
|
||||||
|
eflags |= REG_STARTEND;
|
||||||
|
}
|
||||||
|
|
||||||
if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
|
if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
|
||||||
if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
|
if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
|
||||||
if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
|
if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
|
||||||
|
@ -6713,6 +6723,9 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
|
||||||
/* Handle matching via the native interface. Check for consistency of
|
/* Handle matching via the native interface. Check for consistency of
|
||||||
modifiers. */
|
modifiers. */
|
||||||
|
|
||||||
|
if (dat_datctl.startend[0] != CFORE_UNSET)
|
||||||
|
fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
|
||||||
|
|
||||||
/* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
|
/* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
|
||||||
matching, even if the JIT compiler was used. */
|
matching, even if the JIT compiler was used. */
|
||||||
|
|
||||||
|
@ -7903,6 +7916,7 @@ memset(&def_datctl, 0, sizeof(datctl));
|
||||||
def_datctl.oveccount = DEFAULT_OVECCOUNT;
|
def_datctl.oveccount = DEFAULT_OVECCOUNT;
|
||||||
def_datctl.copy_numbers[0] = -1;
|
def_datctl.copy_numbers[0] = -1;
|
||||||
def_datctl.get_numbers[0] = -1;
|
def_datctl.get_numbers[0] = -1;
|
||||||
|
def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
|
||||||
def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
|
def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
|
||||||
def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
|
def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
|
||||||
|
|
||||||
|
|
|
@ -113,4 +113,14 @@
|
||||||
/(?=(a\K))/
|
/(?=(a\K))/
|
||||||
a
|
a
|
||||||
|
|
||||||
|
/^d(e)$/posix
|
||||||
|
acdef\=posix_startend=2:4
|
||||||
|
acde\=posix_startend=2
|
||||||
|
\= Expect no match
|
||||||
|
acdef
|
||||||
|
acdef\=posix_startend=2
|
||||||
|
|
||||||
|
/^a\x{00}b$/posix
|
||||||
|
a\x{00}b\=posix_startend=0:3
|
||||||
|
|
||||||
# End of testdata/testinput18
|
# End of testdata/testinput18
|
||||||
|
|
|
@ -174,4 +174,21 @@ Start of matched string is beyond its end - displaying from end to start.
|
||||||
0: a
|
0: a
|
||||||
1: a
|
1: a
|
||||||
|
|
||||||
|
/^d(e)$/posix
|
||||||
|
acdef\=posix_startend=2:4
|
||||||
|
0: de
|
||||||
|
1: e
|
||||||
|
acde\=posix_startend=2
|
||||||
|
0: de
|
||||||
|
1: e
|
||||||
|
\= Expect no match
|
||||||
|
acdef
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
acdef\=posix_startend=2
|
||||||
|
No match: POSIX code 17: match failed
|
||||||
|
|
||||||
|
/^a\x{00}b$/posix
|
||||||
|
a\x{00}b\=posix_startend=0:3
|
||||||
|
0: a\x00b
|
||||||
|
|
||||||
# End of testdata/testinput18
|
# End of testdata/testinput18
|
||||||
|
|
Loading…
Reference in New Issue