Fix minor issues raised by Clang sanitize

This commit is contained in:
Philip Hazel 2021-12-14 15:52:24 +00:00
parent 4243515033
commit 1c41a5b815
10 changed files with 336 additions and 313 deletions

View File

@ -41,6 +41,8 @@ of applications treat NULL/0 in this way.
14. Added support for Bidi_Class and Bidi_Control Unicode properties.
15. Fix some minor issues raised by clang sanitize.
Version 10.39 29-October-2021
-----------------------------

View File

@ -3440,12 +3440,12 @@ block may or may not have been changed.
As well as the usual options for <b>pcre2_match()</b>, a number of additional
options can be set in the <i>options</i> argument of <b>pcre2_substitute()</b>.
One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
<i>match_data</i> block must be provided, and it must have been used for an
external call to <b>pcre2_match()</b>. The data in the <i>match_data</i> block
(return code, offset vector) is used for the first substitution instead of
calling <b>pcre2_match()</b> from within <b>pcre2_substitute()</b>. This allows
an application to check for a match before choosing to substitute, without
having to repeat the match.
<i>match_data</i> block must be provided, and it must have already been used for
an external call to <b>pcre2_match()</b> with the same pattern and subject
arguments. The data in the <i>match_data</i> block (return code, offset vector)
is then used for the first substitution instead of calling <b>pcre2_match()</b>
from within <b>pcre2_substitute()</b>. This allows an application to check for a
match before choosing to substitute, without having to repeat the match.
</P>
<P>
The contents of the externally supplied match data block are not changed when
@ -4018,7 +4018,7 @@ Cambridge, England.
</P>
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
<P>
Last updated: 08 December 2021
Last updated: 14 December 2021
<br>
Copyright &copy; 1997-2021 University of Cambridge.
<br>

View File

@ -3321,12 +3321,13 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
As well as the usual options for pcre2_match(), a number of additional
options can be set in the options argument of pcre2_substitute(). One
such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
match_data block must be provided, and it must have been used for an
external call to pcre2_match(). The data in the match_data block (re-
turn code, offset vector) is used for the first substitution instead of
calling pcre2_match() from within pcre2_substitute(). This allows an
application to check for a match before choosing to substitute, without
having to repeat the match.
match_data block must be provided, and it must have already been used
for an external call to pcre2_match() with the same pattern and subject
arguments. The data in the match_data block (return code, offset vec-
tor) is then used for the first substitution instead of calling
pcre2_match() from within pcre2_substitute(). This allows an applica-
tion to check for a match before choosing to substitute, without having
to repeat the match.
The contents of the externally supplied match data block are not
changed when PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTI-
@ -3857,7 +3858,7 @@ AUTHOR
REVISION
Last updated: 08 December 2021
Last updated: 14 December 2021
Copyright (c) 1997-2021 University of Cambridge.
------------------------------------------------------------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2API 3 "08 December 2021" "PCRE2 10.40"
.TH PCRE2API 3 "14 December 2021" "PCRE2 10.40"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@ -3453,12 +3453,12 @@ block may or may not have been changed.
As well as the usual options for \fBpcre2_match()\fP, a number of additional
options can be set in the \fIoptions\fP argument of \fBpcre2_substitute()\fP.
One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
\fImatch_data\fP block must be provided, and it must have been used for an
external call to \fBpcre2_match()\fP. The data in the \fImatch_data\fP block
(return code, offset vector) is used for the first substitution instead of
calling \fBpcre2_match()\fP from within \fBpcre2_substitute()\fP. This allows
an application to check for a match before choosing to substitute, without
having to repeat the match.
\fImatch_data\fP block must be provided, and it must have already been used for
an external call to \fBpcre2_match()\fP with the same pattern and subject
arguments. The data in the \fImatch_data\fP block (return code, offset vector)
is then used for the first substitution instead of calling \fBpcre2_match()\fP
from within \fBpcre2_substitute()\fP. This allows an application to check for a
match before choosing to substitute, without having to repeat the match.
.P
The contents of the externally supplied match data block are not changed when
PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTITUTE_GLOBAL is also set,
@ -4025,6 +4025,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 08 December 2021
Last updated: 14 December 2021
Copyright (c) 1997-2021 University of Cambridge.
.fi

View File

@ -124,7 +124,7 @@ static unsigned int
static int
compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t,
uint32_t *, int32_t *, uint32_t *, int32_t *, branch_chain *,
uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *,
compile_block *, PCRE2_SIZE *);
static int
@ -385,13 +385,15 @@ compiler is clever with identical subexpressions. */
#define SETBIT(a,b) a[(b)/8] = (uint8_t)(a[(b)/8] | (1u << ((b)&7)))
/* Private flags added to firstcu and reqcu. */
/* Values and flags for the unsigned xxcuflags variables that accompany xxcu
variables, which are concerned with first and required code units. A value
greater than or equal to REQ_NONE means "no code unit set"; otherwise the
matching xxcu variable is set, and the low valued bits are relevant. */
#define REQ_CASELESS (1u << 0) /* Indicates caselessness */
#define REQ_VARY (1u << 1) /* reqcu followed non-literal item */
/* Negative values for the firstcu and reqcu flags */
#define REQ_UNSET (-2) /* Not yet found anything */
#define REQ_NONE (-1) /* Found not fixed char */
#define REQ_UNSET 0xffffffffu /* Not yet found anything */
#define REQ_NONE 0xfffffffeu /* Found not fixed character */
#define REQ_CASELESS 0x00000001u /* Code unit in xxcu is caseless */
#define REQ_VARY 0x00000002u /* Code unit is followed by non-literal */
/* These flags are used in the groupinfo vector. */
@ -2139,7 +2141,7 @@ if (c == CHAR_LEFT_CURLY_BRACKET)
while (top != bot)
{
size_t mid = ((top + bot)/2) & (-2);
size_t mid = ((top + bot)/2) & (size_t)(~2+1); /* Mask off bottom bit */
int cf = PRIV(strcmp_c8)(name, prop_synonyms[mid]);
if (cf == 0)
{
@ -5343,9 +5345,9 @@ Arguments:
pptrptr points to the current parsed pattern pointer
errorcodeptr points to error code variable
firstcuptr place to put the first required code unit
firstcuflagsptr place to put the first code unit flags, or a negative number
firstcuflagsptr place to put the first code unit flags
reqcuptr place to put the last required code unit
reqcuflagsptr place to put the last required code unit flags, or a negative number
reqcuflagsptr place to put the last required code unit flags
bcptr points to current branch chain
cb contains pointers to tables etc.
lengthptr NULL during the real compile phase
@ -5358,8 +5360,8 @@ Returns: 0 There's been an error, *errorcodeptr is non-zero
static int
compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
int *errorcodeptr, uint32_t *firstcuptr, int32_t *firstcuflagsptr,
uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr,
int *errorcodeptr, uint32_t *firstcuptr, uint32_t *firstcuflagsptr,
uint32_t *reqcuptr, uint32_t *reqcuflagsptr, branch_chain *bcptr,
compile_block *cb, PCRE2_SIZE *lengthptr)
{
int bravalue = 0;
@ -5374,9 +5376,9 @@ uint32_t zeroreqcu, zerofirstcu;
uint32_t escape;
uint32_t *pptr = *pptrptr;
uint32_t meta, meta_arg;
int32_t firstcuflags, reqcuflags;
int32_t zeroreqcuflags, zerofirstcuflags;
int32_t req_caseopt, reqvary, tempreqvary;
uint32_t firstcuflags, reqcuflags;
uint32_t zeroreqcuflags, zerofirstcuflags;
uint32_t req_caseopt, reqvary, tempreqvary;
PCRE2_SIZE offset = 0;
PCRE2_SIZE length_prevgroup = 0;
PCRE2_UCHAR *code = *codeptr;
@ -5432,7 +5434,7 @@ item types that can be repeated set these backoff variables appropriately. */
firstcu = reqcu = zerofirstcu = zeroreqcu = 0;
firstcuflags = reqcuflags = zerofirstcuflags = zeroreqcuflags = REQ_UNSET;
/* The variable req_caseopt contains either the REQ_CASELESS value or zero,
/* The variable req_caseopt contains either the REQ_CASELESS bit or zero,
according to the current setting of the caseless flag. The REQ_CASELESS value
leaves the lower 28 bit empty. It is added into the firstcu or reqcu variables
to record the case status of the value. This is used only for ASCII characters.
@ -5453,13 +5455,12 @@ for (;; pptr++)
BOOL possessive_quantifier;
BOOL note_group_empty;
int class_has_8bitchar;
int i;
uint32_t mclength;
uint32_t skipunits;
uint32_t subreqcu, subfirstcu;
uint32_t groupnumber;
uint32_t verbarglen, verbculen;
int32_t subreqcuflags, subfirstcuflags; /* Must be signed */
uint32_t subreqcuflags, subfirstcuflags;
open_capitem *oc;
PCRE2_UCHAR mcbuffer[8];
@ -5828,9 +5829,9 @@ for (;; pptr++)
if (taboffset >= 0)
{
if (tabopt >= 0)
for (i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset];
for (int i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset];
else
for (i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset];
for (int i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset];
}
/* Now see if we need to remove any special characters. An option
@ -5844,9 +5845,9 @@ for (;; pptr++)
being built and we are done. */
if (local_negate)
for (i = 0; i < 32; i++) classbits[i] |= ~pbits[i];
for (int i = 0; i < 32; i++) classbits[i] |= (uint8_t)(~pbits[i]);
else
for (i = 0; i < 32; i++) classbits[i] |= pbits[i];
for (int i = 0; i < 32; i++) classbits[i] |= pbits[i];
/* Every class contains at least one < 256 character. */
@ -5885,21 +5886,23 @@ for (;; pptr++)
switch(escape)
{
case ESC_d:
for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
break;
case ESC_D:
should_flip_negation = TRUE;
for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_digit];
for (int i = 0; i < 32; i++)
classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]);
break;
case ESC_w:
for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
break;
case ESC_W:
should_flip_negation = TRUE;
for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_word];
for (int i = 0; i < 32; i++)
classbits[i] |= (uint8_t)(~cbits[i+cbit_word]);
break;
/* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
@ -5910,12 +5913,13 @@ for (;; pptr++)
longer treat \s and \S specially. */
case ESC_s:
for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
break;
case ESC_S:
should_flip_negation = TRUE;
for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_space];
for (int i = 0; i < 32; i++)
classbits[i] |= (uint8_t)(~cbits[i+cbit_space]);
break;
/* When adding the horizontal or vertical space lists to a class, or
@ -6156,7 +6160,7 @@ for (;; pptr++)
if (negate_class && !xclass_has_prop)
{
/* Using 255 ^ instead of ~ avoids clang sanitize warning. */
for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
}
memcpy(code, classbits, 32);
code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
@ -6182,7 +6186,7 @@ for (;; pptr++)
if (negate_class)
{
/* Using 255 ^ instead of ~ avoids clang sanitize warning. */
for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
}
memcpy(code, classbits, 32);
}
@ -6256,7 +6260,7 @@ for (;; pptr++)
verbarglen = *(++pptr);
verbculen = 0;
tempcode = code++;
for (i = 0; i < (int)verbarglen; i++)
for (int i = 0; i < (int)verbarglen; i++)
{
meta = *(++pptr);
#ifdef SUPPORT_UNICODE
@ -6305,6 +6309,7 @@ for (;; pptr++)
bravalue = OP_COND;
{
int count, index;
unsigned int i;
PCRE2_SPTR name;
named_group *ng = cb->named_groups;
uint32_t length = *(++pptr);
@ -6344,7 +6349,7 @@ for (;; pptr++)
groupnumber = 0;
if (meta == META_COND_RNUMBER)
{
for (i = 1; i < (int)length; i++)
for (i = 1; i < length; i++)
{
groupnumber = groupnumber * 10 + name[i] - CHAR_0;
if (groupnumber > MAX_GROUP_NUMBER)
@ -6666,7 +6671,7 @@ for (;; pptr++)
if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET)
{
if (subfirstcuflags >= 0)
if (subfirstcuflags < REQ_NONE)
{
firstcu = subfirstcu;
firstcuflags = subfirstcuflags;
@ -6680,7 +6685,7 @@ for (;; pptr++)
into reqcu if there wasn't one, using the vary flag that was in
existence beforehand. */
else if (subfirstcuflags >= 0 && subreqcuflags < 0)
else if (subfirstcuflags < REQ_NONE && subreqcuflags >= REQ_NONE)
{
subreqcu = subfirstcu;
subreqcuflags = subfirstcuflags | tempreqvary;
@ -6689,7 +6694,7 @@ for (;; pptr++)
/* If the subpattern set a required code unit (or set a first code unit
that isn't really the first code unit - see above), set it. */
if (subreqcuflags >= 0)
if (subreqcuflags < REQ_NONE)
{
reqcu = subreqcu;
reqcuflags = subreqcuflags;
@ -6708,7 +6713,7 @@ for (;; pptr++)
in that example, 'X' ends up set for both. */
else if ((bravalue == OP_ASSERT || bravalue == OP_ASSERT_NA) &&
subreqcuflags >= 0 && subfirstcuflags >= 0)
subreqcuflags < REQ_NONE && subfirstcuflags < REQ_NONE)
{
reqcu = subreqcu;
reqcuflags = subreqcuflags;
@ -6738,7 +6743,7 @@ for (;; pptr++)
this name is duplicated. */
groupnumber = 0;
for (i = 0; i < cb->names_found; i++, ng++)
for (unsigned int i = 0; i < cb->names_found; i++, ng++)
{
if (length == ng->length &&
PRIV(strncmp)(name, ng->name, length) == 0)
@ -7092,7 +7097,7 @@ for (;; pptr++)
*lengthptr += delta;
}
else for (i = 0; i < replicate; i++)
else for (int i = 0; i < replicate; i++)
{
memcpy(code, previous, CU2BYTES(1 + LINK_SIZE));
previous = code;
@ -7268,12 +7273,12 @@ for (;; pptr++)
else
{
if (groupsetfirstcu && reqcuflags < 0)
if (groupsetfirstcu && reqcuflags >= REQ_NONE)
{
reqcu = firstcu;
reqcuflags = firstcuflags;
}
for (i = 1; (uint32_t)i < repeat_min; i++)
for (uint32_t i = 1; i < repeat_min; i++)
{
memcpy(code, previous, CU2BYTES(len));
code += len;
@ -7317,14 +7322,14 @@ for (;; pptr++)
/* This is compiling for real */
else for (i = repeat_max - 1; i >= 0; i--)
else for (uint32_t i = repeat_max; i >= 1; i--)
{
*code++ = OP_BRAZERO + repeat_type;
/* All but the final copy start a new nesting, maintaining the
chain of brackets outstanding. */
if (i != 0)
if (i != 1)
{
int linkoffset;
*code++ = OP_BRA;
@ -8043,9 +8048,9 @@ Arguments:
errorcodeptr -> pointer to error code variable
skipunits skip this many code units at start (for brackets and OP_COND)
firstcuptr place to put the first required code unit
firstcuflagsptr place to put the first code unit flags, or a negative number
firstcuflagsptr place to put the first code unit flags
reqcuptr place to put the last required code unit
reqcuflagsptr place to put the last required code unit flags, or a negative number
reqcuflagsptr place to put the last required code unit flags
bcptr pointer to the chain of currently open branches
cb points to the data block with tables pointers etc.
lengthptr NULL during the real compile phase
@ -8059,7 +8064,7 @@ Returns: 0 There has been an error
static int
compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
int *errorcodeptr, uint32_t skipunits, uint32_t *firstcuptr,
int32_t *firstcuflagsptr, uint32_t *reqcuptr,int32_t *reqcuflagsptr,
uint32_t *firstcuflagsptr, uint32_t *reqcuptr, uint32_t *reqcuflagsptr,
branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr)
{
PCRE2_UCHAR *code = *codeptr;
@ -8072,9 +8077,9 @@ int okreturn = 1;
uint32_t *pptr = *pptrptr;
uint32_t firstcu, reqcu;
uint32_t lookbehindlength;
int32_t firstcuflags, reqcuflags;
uint32_t firstcuflags, reqcuflags;
uint32_t branchfirstcu, branchreqcu;
int32_t branchfirstcuflags, branchreqcuflags;
uint32_t branchfirstcuflags, branchreqcuflags;
PCRE2_SIZE length;
branch_chain bc;
@ -8193,9 +8198,9 @@ for (;;)
if (firstcuflags != branchfirstcuflags || firstcu != branchfirstcu)
{
if (firstcuflags >= 0)
if (firstcuflags < REQ_NONE)
{
if (reqcuflags < 0)
if (reqcuflags >= REQ_NONE)
{
reqcu = firstcu;
reqcuflags = firstcuflags;
@ -8207,8 +8212,8 @@ for (;;)
/* If we (now or from before) have no firstcu, a firstcu from the
branch becomes a reqcu if there isn't a branch reqcu. */
if (firstcuflags < 0 && branchfirstcuflags >= 0 &&
branchreqcuflags < 0)
if (firstcuflags >= REQ_NONE && branchfirstcuflags < REQ_NONE &&
branchreqcuflags >= REQ_NONE)
{
branchreqcu = branchfirstcu;
branchreqcuflags = branchfirstcuflags;
@ -8356,7 +8361,7 @@ Returns: TRUE or FALSE
*/
static BOOL
is_anchored(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb,
is_anchored(PCRE2_SPTR code, uint32_t bracket_map, compile_block *cb,
int atomcount, BOOL inassert)
{
do {
@ -8379,7 +8384,7 @@ do {
op == OP_SCBRA || op == OP_SCBRAPOS)
{
int n = GET2(scode, 1+LINK_SIZE);
int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
uint32_t new_map = bracket_map | ((n < 32)? (1u << n) : 1);
if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE;
}
@ -8739,15 +8744,15 @@ Returns: the fixed first code unit, or 0 with REQ_NONE in flags
*/
static uint32_t
find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, uint32_t inassert)
find_firstassertedcu(PCRE2_SPTR code, uint32_t *flags, uint32_t inassert)
{
uint32_t c = 0;
int cflags = REQ_NONE;
uint32_t cflags = REQ_NONE;
*flags = REQ_NONE;
do {
uint32_t d;
int dflags;
uint32_t dflags;
int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
PCRE2_SPTR scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);
@ -8770,9 +8775,8 @@ do {
case OP_SCRIPT_RUN:
d = find_firstassertedcu(scode, &dflags, inassert +
((op == OP_ASSERT || op == OP_ASSERT_NA)?1:0));
if (dflags < 0)
return 0;
if (cflags < 0) { c = d; cflags = dflags; }
if (dflags >= REQ_NONE) return 0;
if (cflags >= REQ_NONE) { c = d; cflags = dflags; }
else if (c != d || cflags != dflags) return 0;
break;
@ -8785,7 +8789,7 @@ do {
case OP_MINPLUS:
case OP_POSPLUS:
if (inassert == 0) return 0;
if (cflags < 0) { c = scode[1]; cflags = 0; }
if (cflags >= REQ_NONE) { c = scode[1]; cflags = 0; }
else if (c != scode[1]) return 0;
break;
@ -8811,7 +8815,7 @@ do {
#endif
#endif
if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
if (cflags >= REQ_NONE) { c = scode[1]; cflags = REQ_CASELESS; }
else if (c != scode[1]) return 0;
break;
}
@ -9747,7 +9751,7 @@ PCRE2_SIZE re_blocksize; /* Size of memory block */
PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */
PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */
int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
uint32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
uint32_t firstcu, reqcu; /* Value of first/req code unit */
uint32_t setflags = 0; /* NL and BSR set flags */
@ -10427,13 +10431,13 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
(these are not saved during the compile because they can cause conflicts with
actual literals that follow). */
if (firstcuflags < 0)
if (firstcuflags >= REQ_NONE)
firstcu = find_firstassertedcu(codestart, &firstcuflags, 0);
/* Save the data for a first code unit. The existence of one means the
minimum length must be at least 1. */
if (firstcuflags >= 0)
if (firstcuflags < REQ_NONE)
{
re->first_codeunit = firstcu;
re->flags |= PCRE2_FIRSTSET;
@ -10480,16 +10484,16 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
different character and not a non-starting code unit of the first character,
because the minimum length count is in characters, not code units. */
if (reqcuflags >= 0)
if (reqcuflags < REQ_NONE)
{
#if PCRE2_CODE_UNIT_WIDTH == 16
if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */
firstcuflags < 0 || /* First not set */
firstcuflags >= REQ_NONE || /* First not set */
(firstcu & 0xf800) != 0xd800 || /* First not surrogate */
(reqcu & 0xfc00) != 0xdc00) /* Req not low surrogate */
#elif PCRE2_CODE_UNIT_WIDTH == 8
if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */
firstcuflags < 0 || /* First not set */
firstcuflags >= REQ_NONE || /* First not set */
(firstcu & 0x80) == 0 || /* First is ASCII */
(reqcu & 0x80) == 0) /* Req is ASCII */
#endif

View File

@ -747,8 +747,8 @@ typedef struct compile_block {
uint32_t class_range_start; /* Overall class range start */
uint32_t class_range_end; /* Overall class range end */
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
uint32_t req_varyopt; /* "After variable item" flag for reqbyte */
int max_lookbehind; /* Maximum lookbehind (characters) */
int req_varyopt; /* "After variable item" flag for reqbyte */
BOOL had_accept; /* (*ACCEPT) encountered */
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
BOOL had_recurse; /* Had a recursion or subroutine call */

View File

@ -6591,7 +6591,7 @@ if (utf &&
/* If the end precedes start_match, it means there is invalid UTF in the
extra code units we reversed over because of a lookbehind. Advance past the
first bad code unit, and then skip invalid character starting code units in
8-bit and 16-bit modes, and try again. */
8-bit and 16-bit modes, and try again with the original end point. */
if (end_subject < start_match)
{
@ -6600,6 +6600,7 @@ if (utf &&
while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
mb->check_subject++;
#endif
end_subject = true_end_subject;
}
/* Otherwise, set the not end of line option, and do the match. */

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2020 University of Cambridge
New API code Copyright (c) 2016-2021 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@ -908,7 +908,7 @@ set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
{
uint32_t c;
for (c = 0; c < table_limit; c++)
re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]);
re->start_bitmap[c] |= (uint8_t)(~(re->tables[c+cbits_offset+cbit_type]));
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
#endif

View File

@ -304,7 +304,6 @@ else if (use_existing_match)
(pcre2_general_context *)mcontext;
int pairs = (code->top_bracket + 1 < match_data->oveccount)?
code->top_bracket + 1 : match_data->oveccount;
if (subject == NULL) return PCRE2_ERROR_NULL;
internal_match_data = pcre2_match_data_create(match_data->oveccount,
gcontext);
if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
@ -325,7 +324,16 @@ scb.input = subject;
scb.output = (PCRE2_SPTR)buffer;
scb.ovector = ovector;
/* Find lengths of zero-terminated subject */
/* A NULL subject of zero length is treated as an empty string. */
if (subject == NULL)
{
if (length != 0) return PCRE2_ERROR_NULL;
subject = (PCRE2_SPTR)"";
}
/* Find length of zero-terminated subject */
if (length == PCRE2_ZERO_TERMINATED)
length = subject? PRIV(strlen)(subject) : 0;

View File

@ -3152,7 +3152,7 @@ Returns: 0 on success, with the length updated to the number of 16-bit
OR -3 if a value > 0xffff is encountered when not in UTF mode
*/
static PCRE2_SIZE
static int
to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
{
uint16_t *pp;
@ -3239,7 +3239,7 @@ Returns: 0 on success, with the length updated to the number of 32-bit
OR -2 if a value > 0x10ffff is encountered in UTF mode
*/
static PCRE2_SIZE
static int
to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
{
uint32_t *pp;
@ -5490,24 +5490,27 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
{
show_compile_options(
pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
msg = "";
}
if ((FLD(pat_context, extra_options) &
~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0)
{
show_compile_extra_options(
FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS,
pat_patctl.options & (uint32_t)(~POSIX_SUPPORTED_COMPILE_OPTIONS),
msg, "");
msg = "";
}
if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
(pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
if ((FLD(pat_context, extra_options) &
(uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS)) != 0)
{
show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS,
pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg);
show_compile_extra_options(
FLD(pat_context, extra_options) &
(uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS), msg, "");
msg = "";
}
if ((pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS)) != 0 ||
(pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2)) != 0)
{
show_controls(
pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS),
pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2),
msg);
msg = "";
}
@ -7652,12 +7655,16 @@ for (gmatched = 0;; gmatched++)
}
/* The result of the match is now in capcount. First handle a successful
match. */
match. If pp was forced to be NULL (to test NULL handling) it will have been
treated as an empty string if the length was zero. So re-create that for
outputting. */
if (capcount >= 0)
{
int i;
if (pp == NULL) pp = (uint8_t *)"";
if (capcount > (int)oveccount) /* Check for lunatic return value */
{
fprintf(outfile,