Fix minor issues raised by Clang sanitize
This commit is contained in:
parent
4243515033
commit
1c41a5b815
|
@ -41,6 +41,8 @@ of applications treat NULL/0 in this way.
|
||||||
|
|
||||||
14. Added support for Bidi_Class and Bidi_Control Unicode properties.
|
14. Added support for Bidi_Class and Bidi_Control Unicode properties.
|
||||||
|
|
||||||
|
15. Fix some minor issues raised by clang sanitize.
|
||||||
|
|
||||||
|
|
||||||
Version 10.39 29-October-2021
|
Version 10.39 29-October-2021
|
||||||
-----------------------------
|
-----------------------------
|
||||||
|
|
|
@ -3440,12 +3440,12 @@ block may or may not have been changed.
|
||||||
As well as the usual options for <b>pcre2_match()</b>, a number of additional
|
As well as the usual options for <b>pcre2_match()</b>, a number of additional
|
||||||
options can be set in the <i>options</i> argument of <b>pcre2_substitute()</b>.
|
options can be set in the <i>options</i> argument of <b>pcre2_substitute()</b>.
|
||||||
One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
|
One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
|
||||||
<i>match_data</i> block must be provided, and it must have been used for an
|
<i>match_data</i> block must be provided, and it must have already been used for
|
||||||
external call to <b>pcre2_match()</b>. The data in the <i>match_data</i> block
|
an external call to <b>pcre2_match()</b> with the same pattern and subject
|
||||||
(return code, offset vector) is used for the first substitution instead of
|
arguments. The data in the <i>match_data</i> block (return code, offset vector)
|
||||||
calling <b>pcre2_match()</b> from within <b>pcre2_substitute()</b>. This allows
|
is then used for the first substitution instead of calling <b>pcre2_match()</b>
|
||||||
an application to check for a match before choosing to substitute, without
|
from within <b>pcre2_substitute()</b>. This allows an application to check for a
|
||||||
having to repeat the match.
|
match before choosing to substitute, without having to repeat the match.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The contents of the externally supplied match data block are not changed when
|
The contents of the externally supplied match data block are not changed when
|
||||||
|
@ -4018,7 +4018,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 08 December 2021
|
Last updated: 14 December 2021
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2021 University of Cambridge.
|
Copyright © 1997-2021 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
413
doc/pcre2.txt
413
doc/pcre2.txt
|
@ -3321,89 +3321,90 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
||||||
As well as the usual options for pcre2_match(), a number of additional
|
As well as the usual options for pcre2_match(), a number of additional
|
||||||
options can be set in the options argument of pcre2_substitute(). One
|
options can be set in the options argument of pcre2_substitute(). One
|
||||||
such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
|
such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
|
||||||
match_data block must be provided, and it must have been used for an
|
match_data block must be provided, and it must have already been used
|
||||||
external call to pcre2_match(). The data in the match_data block (re-
|
for an external call to pcre2_match() with the same pattern and subject
|
||||||
turn code, offset vector) is used for the first substitution instead of
|
arguments. The data in the match_data block (return code, offset vec-
|
||||||
calling pcre2_match() from within pcre2_substitute(). This allows an
|
tor) is then used for the first substitution instead of calling
|
||||||
application to check for a match before choosing to substitute, without
|
pcre2_match() from within pcre2_substitute(). This allows an applica-
|
||||||
having to repeat the match.
|
tion to check for a match before choosing to substitute, without having
|
||||||
|
to repeat the match.
|
||||||
|
|
||||||
The contents of the externally supplied match data block are not
|
The contents of the externally supplied match data block are not
|
||||||
changed when PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTI-
|
changed when PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTI-
|
||||||
TUTE_GLOBAL is also set, pcre2_match() is called after the first sub-
|
TUTE_GLOBAL is also set, pcre2_match() is called after the first sub-
|
||||||
stitution to check for further matches, but this is done using an in-
|
stitution to check for further matches, but this is done using an in-
|
||||||
ternally obtained match data block, thus always leaving the external
|
ternally obtained match data block, thus always leaving the external
|
||||||
block unchanged.
|
block unchanged.
|
||||||
|
|
||||||
The code argument is not used for matching before the first substitu-
|
The code argument is not used for matching before the first substitu-
|
||||||
tion when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided,
|
tion when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided,
|
||||||
even when PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains in-
|
even when PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains in-
|
||||||
formation such as the UTF setting and the number of capturing parenthe-
|
formation such as the UTF setting and the number of capturing parenthe-
|
||||||
ses in the pattern.
|
ses in the pattern.
|
||||||
|
|
||||||
The default action of pcre2_substitute() is to return a copy of the
|
The default action of pcre2_substitute() is to return a copy of the
|
||||||
subject string with matched substrings replaced. However, if PCRE2_SUB-
|
subject string with matched substrings replaced. However, if PCRE2_SUB-
|
||||||
STITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are
|
STITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are
|
||||||
returned. In the global case, multiple replacements are concatenated in
|
returned. In the global case, multiple replacements are concatenated in
|
||||||
the output buffer. Substitution callouts (see below) can be used to
|
the output buffer. Substitution callouts (see below) can be used to
|
||||||
separate them if necessary.
|
separate them if necessary.
|
||||||
|
|
||||||
The outlengthptr argument of pcre2_substitute() must point to a vari-
|
The outlengthptr argument of pcre2_substitute() must point to a vari-
|
||||||
able that contains the length, in code units, of the output buffer. If
|
able that contains the length, in code units, of the output buffer. If
|
||||||
the function is successful, the value is updated to contain the length
|
the function is successful, the value is updated to contain the length
|
||||||
in code units of the new string, excluding the trailing zero that is
|
in code units of the new string, excluding the trailing zero that is
|
||||||
automatically added.
|
automatically added.
|
||||||
|
|
||||||
If the function is not successful, the value set via outlengthptr de-
|
If the function is not successful, the value set via outlengthptr de-
|
||||||
pends on the type of error. For syntax errors in the replacement
|
pends on the type of error. For syntax errors in the replacement
|
||||||
string, the value is the offset in the replacement string where the er-
|
string, the value is the offset in the replacement string where the er-
|
||||||
ror was detected. For other errors, the value is PCRE2_UNSET by de-
|
ror was detected. For other errors, the value is PCRE2_UNSET by de-
|
||||||
fault. This includes the case of the output buffer being too small, un-
|
fault. This includes the case of the output buffer being too small, un-
|
||||||
less PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set.
|
less PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set.
|
||||||
|
|
||||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output
|
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output
|
||||||
buffer is too small. The default action is to return PCRE2_ERROR_NOMEM-
|
buffer is too small. The default action is to return PCRE2_ERROR_NOMEM-
|
||||||
ORY immediately. If this option is set, however, pcre2_substitute()
|
ORY immediately. If this option is set, however, pcre2_substitute()
|
||||||
continues to go through the motions of matching and substituting (with-
|
continues to go through the motions of matching and substituting (with-
|
||||||
out, of course, writing anything) in order to compute the size of buf-
|
out, of course, writing anything) in order to compute the size of buf-
|
||||||
fer that is needed. This value is passed back via the outlengthptr
|
fer that is needed. This value is passed back via the outlengthptr
|
||||||
variable, with the result of the function still being PCRE2_ER-
|
variable, with the result of the function still being PCRE2_ER-
|
||||||
ROR_NOMEMORY.
|
ROR_NOMEMORY.
|
||||||
|
|
||||||
Passing a buffer size of zero is a permitted way of finding out how
|
Passing a buffer size of zero is a permitted way of finding out how
|
||||||
much memory is needed for given substitution. However, this does mean
|
much memory is needed for given substitution. However, this does mean
|
||||||
that the entire operation is carried out twice. Depending on the appli-
|
that the entire operation is carried out twice. Depending on the appli-
|
||||||
cation, it may be more efficient to allocate a large buffer and free
|
cation, it may be more efficient to allocate a large buffer and free
|
||||||
the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER-
|
the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER-
|
||||||
FLOW_LENGTH.
|
FLOW_LENGTH.
|
||||||
|
|
||||||
The replacement string, which is interpreted as a UTF string in UTF
|
The replacement string, which is interpreted as a UTF string in UTF
|
||||||
mode, is checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An
|
mode, is checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An
|
||||||
invalid UTF replacement string causes an immediate return with the rel-
|
invalid UTF replacement string causes an immediate return with the rel-
|
||||||
evant UTF error code.
|
evant UTF error code.
|
||||||
|
|
||||||
If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not in-
|
If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not in-
|
||||||
terpreted in any way. By default, however, a dollar character is an es-
|
terpreted in any way. By default, however, a dollar character is an es-
|
||||||
cape character that can specify the insertion of characters from cap-
|
cape character that can specify the insertion of characters from cap-
|
||||||
ture groups and names from (*MARK) or other control verbs in the pat-
|
ture groups and names from (*MARK) or other control verbs in the pat-
|
||||||
tern. The following forms are always recognized:
|
tern. The following forms are always recognized:
|
||||||
|
|
||||||
$$ insert a dollar character
|
$$ insert a dollar character
|
||||||
$<n> or ${<n>} insert the contents of group <n>
|
$<n> or ${<n>} insert the contents of group <n>
|
||||||
$*MARK or ${*MARK} insert a control verb name
|
$*MARK or ${*MARK} insert a control verb name
|
||||||
|
|
||||||
Either a group number or a group name can be given for <n>. Curly
|
Either a group number or a group name can be given for <n>. Curly
|
||||||
brackets are required only if the following character would be inter-
|
brackets are required only if the following character would be inter-
|
||||||
preted as part of the number or name. The number may be zero to include
|
preted as part of the number or name. The number may be zero to include
|
||||||
the entire matched string. For example, if the pattern a(b)c is
|
the entire matched string. For example, if the pattern a(b)c is
|
||||||
matched with "=abc=" and the replacement string "+$1$0$1+", the result
|
matched with "=abc=" and the replacement string "+$1$0$1+", the result
|
||||||
is "=+babcb+=".
|
is "=+babcb+=".
|
||||||
|
|
||||||
$*MARK inserts the name from the last encountered backtracking control
|
$*MARK inserts the name from the last encountered backtracking control
|
||||||
verb on the matching path that has a name. (*MARK) must always include
|
verb on the matching path that has a name. (*MARK) must always include
|
||||||
a name, but the other verbs need not. For example, in the case of
|
a name, but the other verbs need not. For example, in the case of
|
||||||
(*MARK:A)(*PRUNE) the name inserted is "A", but for (*MARK:A)(*PRUNE:B)
|
(*MARK:A)(*PRUNE) the name inserted is "A", but for (*MARK:A)(*PRUNE:B)
|
||||||
the relevant name is "B". This facility can be used to perform simple
|
the relevant name is "B". This facility can be used to perform simple
|
||||||
simultaneous substitutions, as this pcre2test example shows:
|
simultaneous substitutions, as this pcre2test example shows:
|
||||||
|
|
||||||
/(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
|
/(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
|
||||||
|
@ -3411,15 +3412,15 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
||||||
2: pear orange
|
2: pear orange
|
||||||
|
|
||||||
PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject
|
PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject
|
||||||
string, replacing every matching substring. If this option is not set,
|
string, replacing every matching substring. If this option is not set,
|
||||||
only the first matching substring is replaced. The search for matches
|
only the first matching substring is replaced. The search for matches
|
||||||
takes place in the original subject string (that is, previous replace-
|
takes place in the original subject string (that is, previous replace-
|
||||||
ments do not affect it). Iteration is implemented by advancing the
|
ments do not affect it). Iteration is implemented by advancing the
|
||||||
startoffset value for each search, which is always passed the entire
|
startoffset value for each search, which is always passed the entire
|
||||||
subject string. If an offset limit is set in the match context, search-
|
subject string. If an offset limit is set in the match context, search-
|
||||||
ing stops when that limit is reached.
|
ing stops when that limit is reached.
|
||||||
|
|
||||||
You can restrict the effect of a global substitution to a portion of
|
You can restrict the effect of a global substitution to a portion of
|
||||||
the subject string by setting either or both of startoffset and an off-
|
the subject string by setting either or both of startoffset and an off-
|
||||||
set limit. Here is a pcre2test example:
|
set limit. Here is a pcre2test example:
|
||||||
|
|
||||||
|
@ -3427,73 +3428,73 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
||||||
ABC ABC ABC ABC\=offset=3,offset_limit=12
|
ABC ABC ABC ABC\=offset=3,offset_limit=12
|
||||||
2: ABC A!C A!C ABC
|
2: ABC A!C A!C ABC
|
||||||
|
|
||||||
When continuing with global substitutions after matching a substring
|
When continuing with global substitutions after matching a substring
|
||||||
with zero length, an attempt to find a non-empty match at the same off-
|
with zero length, an attempt to find a non-empty match at the same off-
|
||||||
set is performed. If this is not successful, the offset is advanced by
|
set is performed. If this is not successful, the offset is advanced by
|
||||||
one character except when CRLF is a valid newline sequence and the next
|
one character except when CRLF is a valid newline sequence and the next
|
||||||
two characters are CR, LF. In this case, the offset is advanced by two
|
two characters are CR, LF. In this case, the offset is advanced by two
|
||||||
characters.
|
characters.
|
||||||
|
|
||||||
PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that
|
PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that
|
||||||
do not appear in the pattern to be treated as unset groups. This option
|
do not appear in the pattern to be treated as unset groups. This option
|
||||||
should be used with care, because it means that a typo in a group name
|
should be used with care, because it means that a typo in a group name
|
||||||
or number no longer causes the PCRE2_ERROR_NOSUBSTRING error.
|
or number no longer causes the PCRE2_ERROR_NOSUBSTRING error.
|
||||||
|
|
||||||
PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (including un-
|
PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (including un-
|
||||||
known groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated
|
known groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated
|
||||||
as empty strings when inserted as described above. If this option is
|
as empty strings when inserted as described above. If this option is
|
||||||
not set, an attempt to insert an unset group causes the PCRE2_ERROR_UN-
|
not set, an attempt to insert an unset group causes the PCRE2_ERROR_UN-
|
||||||
SET error. This option does not influence the extended substitution
|
SET error. This option does not influence the extended substitution
|
||||||
syntax described below.
|
syntax described below.
|
||||||
|
|
||||||
PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the
|
PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the
|
||||||
replacement string. Without this option, only the dollar character is
|
replacement string. Without this option, only the dollar character is
|
||||||
special, and only the group insertion forms listed above are valid.
|
special, and only the group insertion forms listed above are valid.
|
||||||
When PCRE2_SUBSTITUTE_EXTENDED is set, two things change:
|
When PCRE2_SUBSTITUTE_EXTENDED is set, two things change:
|
||||||
|
|
||||||
Firstly, backslash in a replacement string is interpreted as an escape
|
Firstly, backslash in a replacement string is interpreted as an escape
|
||||||
character. The usual forms such as \n or \x{ddd} can be used to specify
|
character. The usual forms such as \n or \x{ddd} can be used to specify
|
||||||
particular character codes, and backslash followed by any non-alphanu-
|
particular character codes, and backslash followed by any non-alphanu-
|
||||||
meric character quotes that character. Extended quoting can be coded
|
meric character quotes that character. Extended quoting can be coded
|
||||||
using \Q...\E, exactly as in pattern strings.
|
using \Q...\E, exactly as in pattern strings.
|
||||||
|
|
||||||
There are also four escape sequences for forcing the case of inserted
|
There are also four escape sequences for forcing the case of inserted
|
||||||
letters. The insertion mechanism has three states: no case forcing,
|
letters. The insertion mechanism has three states: no case forcing,
|
||||||
force upper case, and force lower case. The escape sequences change the
|
force upper case, and force lower case. The escape sequences change the
|
||||||
current state: \U and \L change to upper or lower case forcing, respec-
|
current state: \U and \L change to upper or lower case forcing, respec-
|
||||||
tively, and \E (when not terminating a \Q quoted sequence) reverts to
|
tively, and \E (when not terminating a \Q quoted sequence) reverts to
|
||||||
no case forcing. The sequences \u and \l force the next character (if
|
no case forcing. The sequences \u and \l force the next character (if
|
||||||
it is a letter) to upper or lower case, respectively, and then the
|
it is a letter) to upper or lower case, respectively, and then the
|
||||||
state automatically reverts to no case forcing. Case forcing applies to
|
state automatically reverts to no case forcing. Case forcing applies to
|
||||||
all inserted characters, including those from capture groups and let-
|
all inserted characters, including those from capture groups and let-
|
||||||
ters within \Q...\E quoted sequences. If either PCRE2_UTF or PCRE2_UCP
|
ters within \Q...\E quoted sequences. If either PCRE2_UTF or PCRE2_UCP
|
||||||
was set when the pattern was compiled, Unicode properties are used for
|
was set when the pattern was compiled, Unicode properties are used for
|
||||||
case forcing characters whose code points are greater than 127.
|
case forcing characters whose code points are greater than 127.
|
||||||
|
|
||||||
Note that case forcing sequences such as \U...\E do not nest. For exam-
|
Note that case forcing sequences such as \U...\E do not nest. For exam-
|
||||||
ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final
|
ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final
|
||||||
\E has no effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EX-
|
\E has no effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EX-
|
||||||
TRA_ALT_BSUX options do not apply to replacement strings.
|
TRA_ALT_BSUX options do not apply to replacement strings.
|
||||||
|
|
||||||
The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
|
The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
|
||||||
flexibility to capture group substitution. The syntax is similar to
|
flexibility to capture group substitution. The syntax is similar to
|
||||||
that used by Bash:
|
that used by Bash:
|
||||||
|
|
||||||
${<n>:-<string>}
|
${<n>:-<string>}
|
||||||
${<n>:+<string1>:<string2>}
|
${<n>:+<string1>:<string2>}
|
||||||
|
|
||||||
As before, <n> may be a group number or a name. The first form speci-
|
As before, <n> may be a group number or a name. The first form speci-
|
||||||
fies a default value. If group <n> is set, its value is inserted; if
|
fies a default value. If group <n> is set, its value is inserted; if
|
||||||
not, <string> is expanded and the result inserted. The second form
|
not, <string> is expanded and the result inserted. The second form
|
||||||
specifies strings that are expanded and inserted when group <n> is set
|
specifies strings that are expanded and inserted when group <n> is set
|
||||||
or unset, respectively. The first form is just a convenient shorthand
|
or unset, respectively. The first form is just a convenient shorthand
|
||||||
for
|
for
|
||||||
|
|
||||||
${<n>:+${<n>}:<string>}
|
${<n>:+${<n>}:<string>}
|
||||||
|
|
||||||
Backslash can be used to escape colons and closing curly brackets in
|
Backslash can be used to escape colons and closing curly brackets in
|
||||||
the replacement strings. A change of the case forcing state within a
|
the replacement strings. A change of the case forcing state within a
|
||||||
replacement string remains in force afterwards, as shown in this
|
replacement string remains in force afterwards, as shown in this
|
||||||
pcre2test example:
|
pcre2test example:
|
||||||
|
|
||||||
/(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo
|
/(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo
|
||||||
|
@ -3502,8 +3503,8 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
||||||
somebody
|
somebody
|
||||||
1: HELLO
|
1: HELLO
|
||||||
|
|
||||||
The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended
|
The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended
|
||||||
substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause un-
|
substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause un-
|
||||||
known groups in the extended syntax forms to be treated as unset.
|
known groups in the extended syntax forms to be treated as unset.
|
||||||
|
|
||||||
If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_UNKNOWN_UNSET,
|
If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_UNKNOWN_UNSET,
|
||||||
|
@ -3512,39 +3513,39 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
||||||
|
|
||||||
Substitution errors
|
Substitution errors
|
||||||
|
|
||||||
In the event of an error, pcre2_substitute() returns a negative error
|
In the event of an error, pcre2_substitute() returns a negative error
|
||||||
code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors
|
code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors
|
||||||
from pcre2_match() are passed straight back.
|
from pcre2_match() are passed straight back.
|
||||||
|
|
||||||
PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring inser-
|
PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring inser-
|
||||||
tion, unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set.
|
tion, unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set.
|
||||||
|
|
||||||
PCRE2_ERROR_UNSET is returned for an unset substring insertion (includ-
|
PCRE2_ERROR_UNSET is returned for an unset substring insertion (includ-
|
||||||
ing an unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set)
|
ing an unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set)
|
||||||
when the simple (non-extended) syntax is used and PCRE2_SUBSTITUTE_UN-
|
when the simple (non-extended) syntax is used and PCRE2_SUBSTITUTE_UN-
|
||||||
SET_EMPTY is not set.
|
SET_EMPTY is not set.
|
||||||
|
|
||||||
PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big
|
PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big
|
||||||
enough. If the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size
|
enough. If the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size
|
||||||
of buffer that is needed is returned via outlengthptr. Note that this
|
of buffer that is needed is returned via outlengthptr. Note that this
|
||||||
does not happen by default.
|
does not happen by default.
|
||||||
|
|
||||||
PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the
|
PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the
|
||||||
match_data argument is NULL or if the subject or replacement arguments
|
match_data argument is NULL or if the subject or replacement arguments
|
||||||
are NULL. For backward compatibility reasons an exception is made for
|
are NULL. For backward compatibility reasons an exception is made for
|
||||||
the replacement argument if the rlength argument is also 0.
|
the replacement argument if the rlength argument is also 0.
|
||||||
|
|
||||||
PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in
|
PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in
|
||||||
the replacement string, with more particular errors being PCRE2_ER-
|
the replacement string, with more particular errors being PCRE2_ER-
|
||||||
ROR_BADREPESCAPE (invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE
|
ROR_BADREPESCAPE (invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE
|
||||||
(closing curly bracket not found), PCRE2_ERROR_BADSUBSTITUTION (syntax
|
(closing curly bracket not found), PCRE2_ERROR_BADSUBSTITUTION (syntax
|
||||||
error in extended group substitution), and PCRE2_ERROR_BADSUBSPATTERN
|
error in extended group substitution), and PCRE2_ERROR_BADSUBSPATTERN
|
||||||
(the pattern match ended before it started or the match started earlier
|
(the pattern match ended before it started or the match started earlier
|
||||||
than the current position in the subject, which can happen if \K is
|
than the current position in the subject, which can happen if \K is
|
||||||
used in an assertion).
|
used in an assertion).
|
||||||
|
|
||||||
As for all PCRE2 errors, a text message that describes the error can be
|
As for all PCRE2 errors, a text message that describes the error can be
|
||||||
obtained by calling the pcre2_get_error_message() function (see "Ob-
|
obtained by calling the pcre2_get_error_message() function (see "Ob-
|
||||||
taining a textual error message" above).
|
taining a textual error message" above).
|
||||||
|
|
||||||
Substitution callouts
|
Substitution callouts
|
||||||
|
@ -3553,15 +3554,15 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
||||||
int (*callout_function)(pcre2_substitute_callout_block *, void *),
|
int (*callout_function)(pcre2_substitute_callout_block *, void *),
|
||||||
void *callout_data);
|
void *callout_data);
|
||||||
|
|
||||||
The pcre2_set_substitution_callout() function can be used to specify a
|
The pcre2_set_substitution_callout() function can be used to specify a
|
||||||
callout function for pcre2_substitute(). This information is passed in
|
callout function for pcre2_substitute(). This information is passed in
|
||||||
a match context. The callout function is called after each substitution
|
a match context. The callout function is called after each substitution
|
||||||
has been processed, but it can cause the replacement not to happen. The
|
has been processed, but it can cause the replacement not to happen. The
|
||||||
callout function is not called for simulated substitutions that happen
|
callout function is not called for simulated substitutions that happen
|
||||||
as a result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option.
|
as a result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option.
|
||||||
|
|
||||||
The first argument of the callout function is a pointer to a substitute
|
The first argument of the callout function is a pointer to a substitute
|
||||||
callout block structure, which contains the following fields, not nec-
|
callout block structure, which contains the following fields, not nec-
|
||||||
essarily in this order:
|
essarily in this order:
|
||||||
|
|
||||||
uint32_t version;
|
uint32_t version;
|
||||||
|
@ -3572,34 +3573,34 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
||||||
uint32_t oveccount;
|
uint32_t oveccount;
|
||||||
PCRE2_SIZE output_offsets[2];
|
PCRE2_SIZE output_offsets[2];
|
||||||
|
|
||||||
The version field contains the version number of the block format. The
|
The version field contains the version number of the block format. The
|
||||||
current version is 0. The version number will increase in future if
|
current version is 0. The version number will increase in future if
|
||||||
more fields are added, but the intention is never to remove any of the
|
more fields are added, but the intention is never to remove any of the
|
||||||
existing fields.
|
existing fields.
|
||||||
|
|
||||||
The subscount field is the number of the current match. It is 1 for the
|
The subscount field is the number of the current match. It is 1 for the
|
||||||
first callout, 2 for the second, and so on. The input and output point-
|
first callout, 2 for the second, and so on. The input and output point-
|
||||||
ers are copies of the values passed to pcre2_substitute().
|
ers are copies of the values passed to pcre2_substitute().
|
||||||
|
|
||||||
The ovector field points to the ovector, which contains the result of
|
The ovector field points to the ovector, which contains the result of
|
||||||
the most recent match. The oveccount field contains the number of pairs
|
the most recent match. The oveccount field contains the number of pairs
|
||||||
that are set in the ovector, and is always greater than zero.
|
that are set in the ovector, and is always greater than zero.
|
||||||
|
|
||||||
The output_offsets vector contains the offsets of the replacement in
|
The output_offsets vector contains the offsets of the replacement in
|
||||||
the output string. This has already been processed for dollar and (if
|
the output string. This has already been processed for dollar and (if
|
||||||
requested) backslash substitutions as described above.
|
requested) backslash substitutions as described above.
|
||||||
|
|
||||||
The second argument of the callout function is the value passed as
|
The second argument of the callout function is the value passed as
|
||||||
callout_data when the function was registered. The value returned by
|
callout_data when the function was registered. The value returned by
|
||||||
the callout function is interpreted as follows:
|
the callout function is interpreted as follows:
|
||||||
|
|
||||||
If the value is zero, the replacement is accepted, and, if PCRE2_SUB-
|
If the value is zero, the replacement is accepted, and, if PCRE2_SUB-
|
||||||
STITUTE_GLOBAL is set, processing continues with a search for the next
|
STITUTE_GLOBAL is set, processing continues with a search for the next
|
||||||
match. If the value is not zero, the current replacement is not ac-
|
match. If the value is not zero, the current replacement is not ac-
|
||||||
cepted. If the value is greater than zero, processing continues when
|
cepted. If the value is greater than zero, processing continues when
|
||||||
PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less than zero
|
PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less than zero
|
||||||
or PCRE2_SUBSTITUTE_GLOBAL is not set), the the rest of the input is
|
or PCRE2_SUBSTITUTE_GLOBAL is not set), the the rest of the input is
|
||||||
copied to the output and the call to pcre2_substitute() exits, return-
|
copied to the output and the call to pcre2_substitute() exits, return-
|
||||||
ing the number of matches so far.
|
ing the number of matches so far.
|
||||||
|
|
||||||
|
|
||||||
|
@ -3608,56 +3609,56 @@ DUPLICATE CAPTURE GROUP NAMES
|
||||||
int pcre2_substring_nametable_scan(const pcre2_code *code,
|
int pcre2_substring_nametable_scan(const pcre2_code *code,
|
||||||
PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last);
|
PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last);
|
||||||
|
|
||||||
When a pattern is compiled with the PCRE2_DUPNAMES option, names for
|
When a pattern is compiled with the PCRE2_DUPNAMES option, names for
|
||||||
capture groups are not required to be unique. Duplicate names are al-
|
capture groups are not required to be unique. Duplicate names are al-
|
||||||
ways allowed for groups with the same number, created by using the (?|
|
ways allowed for groups with the same number, created by using the (?|
|
||||||
feature. Indeed, if such groups are named, they are required to use the
|
feature. Indeed, if such groups are named, they are required to use the
|
||||||
same names.
|
same names.
|
||||||
|
|
||||||
Normally, patterns that use duplicate names are such that in any one
|
Normally, patterns that use duplicate names are such that in any one
|
||||||
match, only one of each set of identically-named groups participates.
|
match, only one of each set of identically-named groups participates.
|
||||||
An example is shown in the pcre2pattern documentation.
|
An example is shown in the pcre2pattern documentation.
|
||||||
|
|
||||||
When duplicates are present, pcre2_substring_copy_byname() and
|
When duplicates are present, pcre2_substring_copy_byname() and
|
||||||
pcre2_substring_get_byname() return the first substring corresponding
|
pcre2_substring_get_byname() return the first substring corresponding
|
||||||
to the given name that is set. Only if none are set is PCRE2_ERROR_UN-
|
to the given name that is set. Only if none are set is PCRE2_ERROR_UN-
|
||||||
SET is returned. The pcre2_substring_number_from_name() function re-
|
SET is returned. The pcre2_substring_number_from_name() function re-
|
||||||
turns the error PCRE2_ERROR_NOUNIQUESUBSTRING when there are duplicate
|
turns the error PCRE2_ERROR_NOUNIQUESUBSTRING when there are duplicate
|
||||||
names.
|
names.
|
||||||
|
|
||||||
If you want to get full details of all captured substrings for a given
|
If you want to get full details of all captured substrings for a given
|
||||||
name, you must use the pcre2_substring_nametable_scan() function. The
|
name, you must use the pcre2_substring_nametable_scan() function. The
|
||||||
first argument is the compiled pattern, and the second is the name. If
|
first argument is the compiled pattern, and the second is the name. If
|
||||||
the third and fourth arguments are NULL, the function returns a group
|
the third and fourth arguments are NULL, the function returns a group
|
||||||
number for a unique name, or PCRE2_ERROR_NOUNIQUESUBSTRING otherwise.
|
number for a unique name, or PCRE2_ERROR_NOUNIQUESUBSTRING otherwise.
|
||||||
|
|
||||||
When the third and fourth arguments are not NULL, they must be pointers
|
When the third and fourth arguments are not NULL, they must be pointers
|
||||||
to variables that are updated by the function. After it has run, they
|
to variables that are updated by the function. After it has run, they
|
||||||
point to the first and last entries in the name-to-number table for the
|
point to the first and last entries in the name-to-number table for the
|
||||||
given name, and the function returns the length of each entry in code
|
given name, and the function returns the length of each entry in code
|
||||||
units. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if there are
|
units. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if there are
|
||||||
no entries for the given name.
|
no entries for the given name.
|
||||||
|
|
||||||
The format of the name table is described above in the section entitled
|
The format of the name table is described above in the section entitled
|
||||||
Information about a pattern. Given all the relevant entries for the
|
Information about a pattern. Given all the relevant entries for the
|
||||||
name, you can extract each of their numbers, and hence the captured
|
name, you can extract each of their numbers, and hence the captured
|
||||||
data.
|
data.
|
||||||
|
|
||||||
|
|
||||||
FINDING ALL POSSIBLE MATCHES AT ONE POSITION
|
FINDING ALL POSSIBLE MATCHES AT ONE POSITION
|
||||||
|
|
||||||
The traditional matching function uses a similar algorithm to Perl,
|
The traditional matching function uses a similar algorithm to Perl,
|
||||||
which stops when it finds the first match at a given point in the sub-
|
which stops when it finds the first match at a given point in the sub-
|
||||||
ject. If you want to find all possible matches, or the longest possible
|
ject. If you want to find all possible matches, or the longest possible
|
||||||
match at a given position, consider using the alternative matching
|
match at a given position, consider using the alternative matching
|
||||||
function (see below) instead. If you cannot use the alternative func-
|
function (see below) instead. If you cannot use the alternative func-
|
||||||
tion, you can kludge it up by making use of the callout facility, which
|
tion, you can kludge it up by making use of the callout facility, which
|
||||||
is described in the pcre2callout documentation.
|
is described in the pcre2callout documentation.
|
||||||
|
|
||||||
What you have to do is to insert a callout right at the end of the pat-
|
What you have to do is to insert a callout right at the end of the pat-
|
||||||
tern. When your callout function is called, extract and save the cur-
|
tern. When your callout function is called, extract and save the cur-
|
||||||
rent matched substring. Then return 1, which forces pcre2_match() to
|
rent matched substring. Then return 1, which forces pcre2_match() to
|
||||||
backtrack and try other alternatives. Ultimately, when it runs out of
|
backtrack and try other alternatives. Ultimately, when it runs out of
|
||||||
matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH.
|
matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH.
|
||||||
|
|
||||||
|
|
||||||
|
@ -3669,27 +3670,27 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
||||||
pcre2_match_context *mcontext,
|
pcre2_match_context *mcontext,
|
||||||
int *workspace, PCRE2_SIZE wscount);
|
int *workspace, PCRE2_SIZE wscount);
|
||||||
|
|
||||||
The function pcre2_dfa_match() is called to match a subject string
|
The function pcre2_dfa_match() is called to match a subject string
|
||||||
against a compiled pattern, using a matching algorithm that scans the
|
against a compiled pattern, using a matching algorithm that scans the
|
||||||
subject string just once (not counting lookaround assertions), and does
|
subject string just once (not counting lookaround assertions), and does
|
||||||
not backtrack (except when processing lookaround assertions). This has
|
not backtrack (except when processing lookaround assertions). This has
|
||||||
different characteristics to the normal algorithm, and is not compati-
|
different characteristics to the normal algorithm, and is not compati-
|
||||||
ble with Perl. Some of the features of PCRE2 patterns are not sup-
|
ble with Perl. Some of the features of PCRE2 patterns are not sup-
|
||||||
ported. Nevertheless, there are times when this kind of matching can be
|
ported. Nevertheless, there are times when this kind of matching can be
|
||||||
useful. For a discussion of the two matching algorithms, and a list of
|
useful. For a discussion of the two matching algorithms, and a list of
|
||||||
features that pcre2_dfa_match() does not support, see the pcre2matching
|
features that pcre2_dfa_match() does not support, see the pcre2matching
|
||||||
documentation.
|
documentation.
|
||||||
|
|
||||||
The arguments for the pcre2_dfa_match() function are the same as for
|
The arguments for the pcre2_dfa_match() function are the same as for
|
||||||
pcre2_match(), plus two extras. The ovector within the match data block
|
pcre2_match(), plus two extras. The ovector within the match data block
|
||||||
is used in a different way, and this is described below. The other com-
|
is used in a different way, and this is described below. The other com-
|
||||||
mon arguments are used in the same way as for pcre2_match(), so their
|
mon arguments are used in the same way as for pcre2_match(), so their
|
||||||
description is not repeated here.
|
description is not repeated here.
|
||||||
|
|
||||||
The two additional arguments provide workspace for the function. The
|
The two additional arguments provide workspace for the function. The
|
||||||
workspace vector should contain at least 20 elements. It is used for
|
workspace vector should contain at least 20 elements. It is used for
|
||||||
keeping track of multiple paths through the pattern tree. More
|
keeping track of multiple paths through the pattern tree. More
|
||||||
workspace is needed for patterns and subjects where there are a lot of
|
workspace is needed for patterns and subjects where there are a lot of
|
||||||
potential matches.
|
potential matches.
|
||||||
|
|
||||||
Here is an example of a simple call to pcre2_dfa_match():
|
Here is an example of a simple call to pcre2_dfa_match():
|
||||||
|
@ -3709,45 +3710,45 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
||||||
|
|
||||||
Option bits for pcre2_dfa_match()
|
Option bits for pcre2_dfa_match()
|
||||||
|
|
||||||
The unused bits of the options argument for pcre2_dfa_match() must be
|
The unused bits of the options argument for pcre2_dfa_match() must be
|
||||||
zero. The only bits that may be set are PCRE2_ANCHORED,
|
zero. The only bits that may be set are PCRE2_ANCHORED,
|
||||||
PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NO-
|
PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NO-
|
||||||
TEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK,
|
TEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_UTF_CHECK,
|
||||||
PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and
|
PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT, PCRE2_DFA_SHORTEST, and
|
||||||
PCRE2_DFA_RESTART. All but the last four of these are exactly the same
|
PCRE2_DFA_RESTART. All but the last four of these are exactly the same
|
||||||
as for pcre2_match(), so their description is not repeated here.
|
as for pcre2_match(), so their description is not repeated here.
|
||||||
|
|
||||||
PCRE2_PARTIAL_HARD
|
PCRE2_PARTIAL_HARD
|
||||||
PCRE2_PARTIAL_SOFT
|
PCRE2_PARTIAL_SOFT
|
||||||
|
|
||||||
These have the same general effect as they do for pcre2_match(), but
|
These have the same general effect as they do for pcre2_match(), but
|
||||||
the details are slightly different. When PCRE2_PARTIAL_HARD is set for
|
the details are slightly different. When PCRE2_PARTIAL_HARD is set for
|
||||||
pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the
|
pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the
|
||||||
subject is reached and there is still at least one matching possibility
|
subject is reached and there is still at least one matching possibility
|
||||||
that requires additional characters. This happens even if some complete
|
that requires additional characters. This happens even if some complete
|
||||||
matches have already been found. When PCRE2_PARTIAL_SOFT is set, the
|
matches have already been found. When PCRE2_PARTIAL_SOFT is set, the
|
||||||
return code PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL
|
return code PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL
|
||||||
if the end of the subject is reached, there have been no complete
|
if the end of the subject is reached, there have been no complete
|
||||||
matches, but there is still at least one matching possibility. The por-
|
matches, but there is still at least one matching possibility. The por-
|
||||||
tion of the string that was inspected when the longest partial match
|
tion of the string that was inspected when the longest partial match
|
||||||
was found is set as the first matching string in both cases. There is a
|
was found is set as the first matching string in both cases. There is a
|
||||||
more detailed discussion of partial and multi-segment matching, with
|
more detailed discussion of partial and multi-segment matching, with
|
||||||
examples, in the pcre2partial documentation.
|
examples, in the pcre2partial documentation.
|
||||||
|
|
||||||
PCRE2_DFA_SHORTEST
|
PCRE2_DFA_SHORTEST
|
||||||
|
|
||||||
Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to
|
Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to
|
||||||
stop as soon as it has found one match. Because of the way the alterna-
|
stop as soon as it has found one match. Because of the way the alterna-
|
||||||
tive algorithm works, this is necessarily the shortest possible match
|
tive algorithm works, this is necessarily the shortest possible match
|
||||||
at the first possible matching point in the subject string.
|
at the first possible matching point in the subject string.
|
||||||
|
|
||||||
PCRE2_DFA_RESTART
|
PCRE2_DFA_RESTART
|
||||||
|
|
||||||
When pcre2_dfa_match() returns a partial match, it is possible to call
|
When pcre2_dfa_match() returns a partial match, it is possible to call
|
||||||
it again, with additional subject characters, and have it continue with
|
it again, with additional subject characters, and have it continue with
|
||||||
the same match. The PCRE2_DFA_RESTART option requests this action; when
|
the same match. The PCRE2_DFA_RESTART option requests this action; when
|
||||||
it is set, the workspace and wscount options must reference the same
|
it is set, the workspace and wscount options must reference the same
|
||||||
vector as before because data about the match so far is left in them
|
vector as before because data about the match so far is left in them
|
||||||
after a partial match. There is more discussion of this facility in the
|
after a partial match. There is more discussion of this facility in the
|
||||||
pcre2partial documentation.
|
pcre2partial documentation.
|
||||||
|
|
||||||
|
@ -3755,8 +3756,8 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
||||||
|
|
||||||
When pcre2_dfa_match() succeeds, it may have matched more than one sub-
|
When pcre2_dfa_match() succeeds, it may have matched more than one sub-
|
||||||
string in the subject. Note, however, that all the matches from one run
|
string in the subject. Note, however, that all the matches from one run
|
||||||
of the function start at the same point in the subject. The shorter
|
of the function start at the same point in the subject. The shorter
|
||||||
matches are all initial substrings of the longer matches. For example,
|
matches are all initial substrings of the longer matches. For example,
|
||||||
if the pattern
|
if the pattern
|
||||||
|
|
||||||
<.*>
|
<.*>
|
||||||
|
@ -3771,80 +3772,80 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
||||||
<something> <something else>
|
<something> <something else>
|
||||||
<something>
|
<something>
|
||||||
|
|
||||||
On success, the yield of the function is a number greater than zero,
|
On success, the yield of the function is a number greater than zero,
|
||||||
which is the number of matched substrings. The offsets of the sub-
|
which is the number of matched substrings. The offsets of the sub-
|
||||||
strings are returned in the ovector, and can be extracted by number in
|
strings are returned in the ovector, and can be extracted by number in
|
||||||
the same way as for pcre2_match(), but the numbers bear no relation to
|
the same way as for pcre2_match(), but the numbers bear no relation to
|
||||||
any capture groups that may exist in the pattern, because DFA matching
|
any capture groups that may exist in the pattern, because DFA matching
|
||||||
does not support capturing.
|
does not support capturing.
|
||||||
|
|
||||||
Calls to the convenience functions that extract substrings by name re-
|
Calls to the convenience functions that extract substrings by name re-
|
||||||
turn the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used af-
|
turn the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used af-
|
||||||
ter a DFA match. The convenience functions that extract substrings by
|
ter a DFA match. The convenience functions that extract substrings by
|
||||||
number never return PCRE2_ERROR_NOSUBSTRING.
|
number never return PCRE2_ERROR_NOSUBSTRING.
|
||||||
|
|
||||||
The matched strings are stored in the ovector in reverse order of
|
The matched strings are stored in the ovector in reverse order of
|
||||||
length; that is, the longest matching string is first. If there were
|
length; that is, the longest matching string is first. If there were
|
||||||
too many matches to fit into the ovector, the yield of the function is
|
too many matches to fit into the ovector, the yield of the function is
|
||||||
zero, and the vector is filled with the longest matches.
|
zero, and the vector is filled with the longest matches.
|
||||||
|
|
||||||
NOTE: PCRE2's "auto-possessification" optimization usually applies to
|
NOTE: PCRE2's "auto-possessification" optimization usually applies to
|
||||||
character repeats at the end of a pattern (as well as internally). For
|
character repeats at the end of a pattern (as well as internally). For
|
||||||
example, the pattern "a\d+" is compiled as if it were "a\d++". For DFA
|
example, the pattern "a\d+" is compiled as if it were "a\d++". For DFA
|
||||||
matching, this means that only one possible match is found. If you re-
|
matching, this means that only one possible match is found. If you re-
|
||||||
ally do want multiple matches in such cases, either use an ungreedy re-
|
ally do want multiple matches in such cases, either use an ungreedy re-
|
||||||
peat such as "a\d+?" or set the PCRE2_NO_AUTO_POSSESS option when com-
|
peat such as "a\d+?" or set the PCRE2_NO_AUTO_POSSESS option when com-
|
||||||
piling.
|
piling.
|
||||||
|
|
||||||
Error returns from pcre2_dfa_match()
|
Error returns from pcre2_dfa_match()
|
||||||
|
|
||||||
The pcre2_dfa_match() function returns a negative number when it fails.
|
The pcre2_dfa_match() function returns a negative number when it fails.
|
||||||
Many of the errors are the same as for pcre2_match(), as described
|
Many of the errors are the same as for pcre2_match(), as described
|
||||||
above. There are in addition the following errors that are specific to
|
above. There are in addition the following errors that are specific to
|
||||||
pcre2_dfa_match():
|
pcre2_dfa_match():
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_UITEM
|
PCRE2_ERROR_DFA_UITEM
|
||||||
|
|
||||||
This return is given if pcre2_dfa_match() encounters an item in the
|
This return is given if pcre2_dfa_match() encounters an item in the
|
||||||
pattern that it does not support, for instance, the use of \C in a UTF
|
pattern that it does not support, for instance, the use of \C in a UTF
|
||||||
mode or a backreference.
|
mode or a backreference.
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_UCOND
|
PCRE2_ERROR_DFA_UCOND
|
||||||
|
|
||||||
This return is given if pcre2_dfa_match() encounters a condition item
|
This return is given if pcre2_dfa_match() encounters a condition item
|
||||||
that uses a backreference for the condition, or a test for recursion in
|
that uses a backreference for the condition, or a test for recursion in
|
||||||
a specific capture group. These are not supported.
|
a specific capture group. These are not supported.
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_UINVALID_UTF
|
PCRE2_ERROR_DFA_UINVALID_UTF
|
||||||
|
|
||||||
This return is given if pcre2_dfa_match() is called for a pattern that
|
This return is given if pcre2_dfa_match() is called for a pattern that
|
||||||
was compiled with PCRE2_MATCH_INVALID_UTF. This is not supported for
|
was compiled with PCRE2_MATCH_INVALID_UTF. This is not supported for
|
||||||
DFA matching.
|
DFA matching.
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_WSSIZE
|
PCRE2_ERROR_DFA_WSSIZE
|
||||||
|
|
||||||
This return is given if pcre2_dfa_match() runs out of space in the
|
This return is given if pcre2_dfa_match() runs out of space in the
|
||||||
workspace vector.
|
workspace vector.
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_RECURSE
|
PCRE2_ERROR_DFA_RECURSE
|
||||||
|
|
||||||
When a recursion or subroutine call is processed, the matching function
|
When a recursion or subroutine call is processed, the matching function
|
||||||
calls itself recursively, using private memory for the ovector and
|
calls itself recursively, using private memory for the ovector and
|
||||||
workspace. This error is given if the internal ovector is not large
|
workspace. This error is given if the internal ovector is not large
|
||||||
enough. This should be extremely rare, as a vector of size 1000 is
|
enough. This should be extremely rare, as a vector of size 1000 is
|
||||||
used.
|
used.
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_BADRESTART
|
PCRE2_ERROR_DFA_BADRESTART
|
||||||
|
|
||||||
When pcre2_dfa_match() is called with the PCRE2_DFA_RESTART option,
|
When pcre2_dfa_match() is called with the PCRE2_DFA_RESTART option,
|
||||||
some plausibility checks are made on the contents of the workspace,
|
some plausibility checks are made on the contents of the workspace,
|
||||||
which should contain data about the previous partial match. If any of
|
which should contain data about the previous partial match. If any of
|
||||||
these checks fail, this error is given.
|
these checks fail, this error is given.
|
||||||
|
|
||||||
|
|
||||||
SEE ALSO
|
SEE ALSO
|
||||||
|
|
||||||
pcre2build(3), pcre2callout(3), pcre2demo(3), pcre2matching(3),
|
pcre2build(3), pcre2callout(3), pcre2demo(3), pcre2matching(3),
|
||||||
pcre2partial(3), pcre2posix(3), pcre2sample(3), pcre2unicode(3).
|
pcre2partial(3), pcre2posix(3), pcre2sample(3), pcre2unicode(3).
|
||||||
|
|
||||||
|
|
||||||
|
@ -3857,7 +3858,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 08 December 2021
|
Last updated: 14 December 2021
|
||||||
Copyright (c) 1997-2021 University of Cambridge.
|
Copyright (c) 1997-2021 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "08 December 2021" "PCRE2 10.40"
|
.TH PCRE2API 3 "14 December 2021" "PCRE2 10.40"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -3453,12 +3453,12 @@ block may or may not have been changed.
|
||||||
As well as the usual options for \fBpcre2_match()\fP, a number of additional
|
As well as the usual options for \fBpcre2_match()\fP, a number of additional
|
||||||
options can be set in the \fIoptions\fP argument of \fBpcre2_substitute()\fP.
|
options can be set in the \fIoptions\fP argument of \fBpcre2_substitute()\fP.
|
||||||
One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
|
One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
|
||||||
\fImatch_data\fP block must be provided, and it must have been used for an
|
\fImatch_data\fP block must be provided, and it must have already been used for
|
||||||
external call to \fBpcre2_match()\fP. The data in the \fImatch_data\fP block
|
an external call to \fBpcre2_match()\fP with the same pattern and subject
|
||||||
(return code, offset vector) is used for the first substitution instead of
|
arguments. The data in the \fImatch_data\fP block (return code, offset vector)
|
||||||
calling \fBpcre2_match()\fP from within \fBpcre2_substitute()\fP. This allows
|
is then used for the first substitution instead of calling \fBpcre2_match()\fP
|
||||||
an application to check for a match before choosing to substitute, without
|
from within \fBpcre2_substitute()\fP. This allows an application to check for a
|
||||||
having to repeat the match.
|
match before choosing to substitute, without having to repeat the match.
|
||||||
.P
|
.P
|
||||||
The contents of the externally supplied match data block are not changed when
|
The contents of the externally supplied match data block are not changed when
|
||||||
PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTITUTE_GLOBAL is also set,
|
PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTITUTE_GLOBAL is also set,
|
||||||
|
@ -4025,6 +4025,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 08 December 2021
|
Last updated: 14 December 2021
|
||||||
Copyright (c) 1997-2021 University of Cambridge.
|
Copyright (c) 1997-2021 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -124,7 +124,7 @@ static unsigned int
|
||||||
|
|
||||||
static int
|
static int
|
||||||
compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t,
|
compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t,
|
||||||
uint32_t *, int32_t *, uint32_t *, int32_t *, branch_chain *,
|
uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *,
|
||||||
compile_block *, PCRE2_SIZE *);
|
compile_block *, PCRE2_SIZE *);
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -385,13 +385,15 @@ compiler is clever with identical subexpressions. */
|
||||||
|
|
||||||
#define SETBIT(a,b) a[(b)/8] = (uint8_t)(a[(b)/8] | (1u << ((b)&7)))
|
#define SETBIT(a,b) a[(b)/8] = (uint8_t)(a[(b)/8] | (1u << ((b)&7)))
|
||||||
|
|
||||||
/* Private flags added to firstcu and reqcu. */
|
/* Values and flags for the unsigned xxcuflags variables that accompany xxcu
|
||||||
|
variables, which are concerned with first and required code units. A value
|
||||||
|
greater than or equal to REQ_NONE means "no code unit set"; otherwise the
|
||||||
|
matching xxcu variable is set, and the low valued bits are relevant. */
|
||||||
|
|
||||||
#define REQ_CASELESS (1u << 0) /* Indicates caselessness */
|
#define REQ_UNSET 0xffffffffu /* Not yet found anything */
|
||||||
#define REQ_VARY (1u << 1) /* reqcu followed non-literal item */
|
#define REQ_NONE 0xfffffffeu /* Found not fixed character */
|
||||||
/* Negative values for the firstcu and reqcu flags */
|
#define REQ_CASELESS 0x00000001u /* Code unit in xxcu is caseless */
|
||||||
#define REQ_UNSET (-2) /* Not yet found anything */
|
#define REQ_VARY 0x00000002u /* Code unit is followed by non-literal */
|
||||||
#define REQ_NONE (-1) /* Found not fixed char */
|
|
||||||
|
|
||||||
/* These flags are used in the groupinfo vector. */
|
/* These flags are used in the groupinfo vector. */
|
||||||
|
|
||||||
|
@ -2139,7 +2141,7 @@ if (c == CHAR_LEFT_CURLY_BRACKET)
|
||||||
|
|
||||||
while (top != bot)
|
while (top != bot)
|
||||||
{
|
{
|
||||||
size_t mid = ((top + bot)/2) & (-2);
|
size_t mid = ((top + bot)/2) & (size_t)(~2+1); /* Mask off bottom bit */
|
||||||
int cf = PRIV(strcmp_c8)(name, prop_synonyms[mid]);
|
int cf = PRIV(strcmp_c8)(name, prop_synonyms[mid]);
|
||||||
if (cf == 0)
|
if (cf == 0)
|
||||||
{
|
{
|
||||||
|
@ -5343,9 +5345,9 @@ Arguments:
|
||||||
pptrptr points to the current parsed pattern pointer
|
pptrptr points to the current parsed pattern pointer
|
||||||
errorcodeptr points to error code variable
|
errorcodeptr points to error code variable
|
||||||
firstcuptr place to put the first required code unit
|
firstcuptr place to put the first required code unit
|
||||||
firstcuflagsptr place to put the first code unit flags, or a negative number
|
firstcuflagsptr place to put the first code unit flags
|
||||||
reqcuptr place to put the last required code unit
|
reqcuptr place to put the last required code unit
|
||||||
reqcuflagsptr place to put the last required code unit flags, or a negative number
|
reqcuflagsptr place to put the last required code unit flags
|
||||||
bcptr points to current branch chain
|
bcptr points to current branch chain
|
||||||
cb contains pointers to tables etc.
|
cb contains pointers to tables etc.
|
||||||
lengthptr NULL during the real compile phase
|
lengthptr NULL during the real compile phase
|
||||||
|
@ -5358,8 +5360,8 @@ Returns: 0 There's been an error, *errorcodeptr is non-zero
|
||||||
|
|
||||||
static int
|
static int
|
||||||
compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
|
compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
|
||||||
int *errorcodeptr, uint32_t *firstcuptr, int32_t *firstcuflagsptr,
|
int *errorcodeptr, uint32_t *firstcuptr, uint32_t *firstcuflagsptr,
|
||||||
uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr,
|
uint32_t *reqcuptr, uint32_t *reqcuflagsptr, branch_chain *bcptr,
|
||||||
compile_block *cb, PCRE2_SIZE *lengthptr)
|
compile_block *cb, PCRE2_SIZE *lengthptr)
|
||||||
{
|
{
|
||||||
int bravalue = 0;
|
int bravalue = 0;
|
||||||
|
@ -5374,9 +5376,9 @@ uint32_t zeroreqcu, zerofirstcu;
|
||||||
uint32_t escape;
|
uint32_t escape;
|
||||||
uint32_t *pptr = *pptrptr;
|
uint32_t *pptr = *pptrptr;
|
||||||
uint32_t meta, meta_arg;
|
uint32_t meta, meta_arg;
|
||||||
int32_t firstcuflags, reqcuflags;
|
uint32_t firstcuflags, reqcuflags;
|
||||||
int32_t zeroreqcuflags, zerofirstcuflags;
|
uint32_t zeroreqcuflags, zerofirstcuflags;
|
||||||
int32_t req_caseopt, reqvary, tempreqvary;
|
uint32_t req_caseopt, reqvary, tempreqvary;
|
||||||
PCRE2_SIZE offset = 0;
|
PCRE2_SIZE offset = 0;
|
||||||
PCRE2_SIZE length_prevgroup = 0;
|
PCRE2_SIZE length_prevgroup = 0;
|
||||||
PCRE2_UCHAR *code = *codeptr;
|
PCRE2_UCHAR *code = *codeptr;
|
||||||
|
@ -5432,13 +5434,13 @@ item types that can be repeated set these backoff variables appropriately. */
|
||||||
firstcu = reqcu = zerofirstcu = zeroreqcu = 0;
|
firstcu = reqcu = zerofirstcu = zeroreqcu = 0;
|
||||||
firstcuflags = reqcuflags = zerofirstcuflags = zeroreqcuflags = REQ_UNSET;
|
firstcuflags = reqcuflags = zerofirstcuflags = zeroreqcuflags = REQ_UNSET;
|
||||||
|
|
||||||
/* The variable req_caseopt contains either the REQ_CASELESS value or zero,
|
/* The variable req_caseopt contains either the REQ_CASELESS bit or zero,
|
||||||
according to the current setting of the caseless flag. The REQ_CASELESS value
|
according to the current setting of the caseless flag. The REQ_CASELESS value
|
||||||
leaves the lower 28 bit empty. It is added into the firstcu or reqcu variables
|
leaves the lower 28 bit empty. It is added into the firstcu or reqcu variables
|
||||||
to record the case status of the value. This is used only for ASCII characters.
|
to record the case status of the value. This is used only for ASCII characters.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
|
req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0;
|
||||||
|
|
||||||
/* Switch on next META item until the end of the branch */
|
/* Switch on next META item until the end of the branch */
|
||||||
|
|
||||||
|
@ -5453,13 +5455,12 @@ for (;; pptr++)
|
||||||
BOOL possessive_quantifier;
|
BOOL possessive_quantifier;
|
||||||
BOOL note_group_empty;
|
BOOL note_group_empty;
|
||||||
int class_has_8bitchar;
|
int class_has_8bitchar;
|
||||||
int i;
|
|
||||||
uint32_t mclength;
|
uint32_t mclength;
|
||||||
uint32_t skipunits;
|
uint32_t skipunits;
|
||||||
uint32_t subreqcu, subfirstcu;
|
uint32_t subreqcu, subfirstcu;
|
||||||
uint32_t groupnumber;
|
uint32_t groupnumber;
|
||||||
uint32_t verbarglen, verbculen;
|
uint32_t verbarglen, verbculen;
|
||||||
int32_t subreqcuflags, subfirstcuflags; /* Must be signed */
|
uint32_t subreqcuflags, subfirstcuflags;
|
||||||
open_capitem *oc;
|
open_capitem *oc;
|
||||||
PCRE2_UCHAR mcbuffer[8];
|
PCRE2_UCHAR mcbuffer[8];
|
||||||
|
|
||||||
|
@ -5828,9 +5829,9 @@ for (;; pptr++)
|
||||||
if (taboffset >= 0)
|
if (taboffset >= 0)
|
||||||
{
|
{
|
||||||
if (tabopt >= 0)
|
if (tabopt >= 0)
|
||||||
for (i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset];
|
for (int i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset];
|
||||||
else
|
else
|
||||||
for (i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset];
|
for (int i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Now see if we need to remove any special characters. An option
|
/* Now see if we need to remove any special characters. An option
|
||||||
|
@ -5844,9 +5845,9 @@ for (;; pptr++)
|
||||||
being built and we are done. */
|
being built and we are done. */
|
||||||
|
|
||||||
if (local_negate)
|
if (local_negate)
|
||||||
for (i = 0; i < 32; i++) classbits[i] |= ~pbits[i];
|
for (int i = 0; i < 32; i++) classbits[i] |= (uint8_t)(~pbits[i]);
|
||||||
else
|
else
|
||||||
for (i = 0; i < 32; i++) classbits[i] |= pbits[i];
|
for (int i = 0; i < 32; i++) classbits[i] |= pbits[i];
|
||||||
|
|
||||||
/* Every class contains at least one < 256 character. */
|
/* Every class contains at least one < 256 character. */
|
||||||
|
|
||||||
|
@ -5885,21 +5886,23 @@ for (;; pptr++)
|
||||||
switch(escape)
|
switch(escape)
|
||||||
{
|
{
|
||||||
case ESC_d:
|
case ESC_d:
|
||||||
for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
|
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ESC_D:
|
case ESC_D:
|
||||||
should_flip_negation = TRUE;
|
should_flip_negation = TRUE;
|
||||||
for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_digit];
|
for (int i = 0; i < 32; i++)
|
||||||
|
classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ESC_w:
|
case ESC_w:
|
||||||
for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
|
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ESC_W:
|
case ESC_W:
|
||||||
should_flip_negation = TRUE;
|
should_flip_negation = TRUE;
|
||||||
for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_word];
|
for (int i = 0; i < 32; i++)
|
||||||
|
classbits[i] |= (uint8_t)(~cbits[i+cbit_word]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
|
/* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
|
||||||
|
@ -5910,12 +5913,13 @@ for (;; pptr++)
|
||||||
longer treat \s and \S specially. */
|
longer treat \s and \S specially. */
|
||||||
|
|
||||||
case ESC_s:
|
case ESC_s:
|
||||||
for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
|
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ESC_S:
|
case ESC_S:
|
||||||
should_flip_negation = TRUE;
|
should_flip_negation = TRUE;
|
||||||
for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_space];
|
for (int i = 0; i < 32; i++)
|
||||||
|
classbits[i] |= (uint8_t)(~cbits[i+cbit_space]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* When adding the horizontal or vertical space lists to a class, or
|
/* When adding the horizontal or vertical space lists to a class, or
|
||||||
|
@ -6156,7 +6160,7 @@ for (;; pptr++)
|
||||||
if (negate_class && !xclass_has_prop)
|
if (negate_class && !xclass_has_prop)
|
||||||
{
|
{
|
||||||
/* Using 255 ^ instead of ~ avoids clang sanitize warning. */
|
/* Using 255 ^ instead of ~ avoids clang sanitize warning. */
|
||||||
for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
|
for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
|
||||||
}
|
}
|
||||||
memcpy(code, classbits, 32);
|
memcpy(code, classbits, 32);
|
||||||
code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
|
code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
|
||||||
|
@ -6182,7 +6186,7 @@ for (;; pptr++)
|
||||||
if (negate_class)
|
if (negate_class)
|
||||||
{
|
{
|
||||||
/* Using 255 ^ instead of ~ avoids clang sanitize warning. */
|
/* Using 255 ^ instead of ~ avoids clang sanitize warning. */
|
||||||
for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
|
for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
|
||||||
}
|
}
|
||||||
memcpy(code, classbits, 32);
|
memcpy(code, classbits, 32);
|
||||||
}
|
}
|
||||||
|
@ -6256,7 +6260,7 @@ for (;; pptr++)
|
||||||
verbarglen = *(++pptr);
|
verbarglen = *(++pptr);
|
||||||
verbculen = 0;
|
verbculen = 0;
|
||||||
tempcode = code++;
|
tempcode = code++;
|
||||||
for (i = 0; i < (int)verbarglen; i++)
|
for (int i = 0; i < (int)verbarglen; i++)
|
||||||
{
|
{
|
||||||
meta = *(++pptr);
|
meta = *(++pptr);
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
|
@ -6305,6 +6309,7 @@ for (;; pptr++)
|
||||||
bravalue = OP_COND;
|
bravalue = OP_COND;
|
||||||
{
|
{
|
||||||
int count, index;
|
int count, index;
|
||||||
|
unsigned int i;
|
||||||
PCRE2_SPTR name;
|
PCRE2_SPTR name;
|
||||||
named_group *ng = cb->named_groups;
|
named_group *ng = cb->named_groups;
|
||||||
uint32_t length = *(++pptr);
|
uint32_t length = *(++pptr);
|
||||||
|
@ -6344,7 +6349,7 @@ for (;; pptr++)
|
||||||
groupnumber = 0;
|
groupnumber = 0;
|
||||||
if (meta == META_COND_RNUMBER)
|
if (meta == META_COND_RNUMBER)
|
||||||
{
|
{
|
||||||
for (i = 1; i < (int)length; i++)
|
for (i = 1; i < length; i++)
|
||||||
{
|
{
|
||||||
groupnumber = groupnumber * 10 + name[i] - CHAR_0;
|
groupnumber = groupnumber * 10 + name[i] - CHAR_0;
|
||||||
if (groupnumber > MAX_GROUP_NUMBER)
|
if (groupnumber > MAX_GROUP_NUMBER)
|
||||||
|
@ -6666,7 +6671,7 @@ for (;; pptr++)
|
||||||
|
|
||||||
if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET)
|
if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET)
|
||||||
{
|
{
|
||||||
if (subfirstcuflags >= 0)
|
if (subfirstcuflags < REQ_NONE)
|
||||||
{
|
{
|
||||||
firstcu = subfirstcu;
|
firstcu = subfirstcu;
|
||||||
firstcuflags = subfirstcuflags;
|
firstcuflags = subfirstcuflags;
|
||||||
|
@ -6680,7 +6685,7 @@ for (;; pptr++)
|
||||||
into reqcu if there wasn't one, using the vary flag that was in
|
into reqcu if there wasn't one, using the vary flag that was in
|
||||||
existence beforehand. */
|
existence beforehand. */
|
||||||
|
|
||||||
else if (subfirstcuflags >= 0 && subreqcuflags < 0)
|
else if (subfirstcuflags < REQ_NONE && subreqcuflags >= REQ_NONE)
|
||||||
{
|
{
|
||||||
subreqcu = subfirstcu;
|
subreqcu = subfirstcu;
|
||||||
subreqcuflags = subfirstcuflags | tempreqvary;
|
subreqcuflags = subfirstcuflags | tempreqvary;
|
||||||
|
@ -6689,7 +6694,7 @@ for (;; pptr++)
|
||||||
/* If the subpattern set a required code unit (or set a first code unit
|
/* If the subpattern set a required code unit (or set a first code unit
|
||||||
that isn't really the first code unit - see above), set it. */
|
that isn't really the first code unit - see above), set it. */
|
||||||
|
|
||||||
if (subreqcuflags >= 0)
|
if (subreqcuflags < REQ_NONE)
|
||||||
{
|
{
|
||||||
reqcu = subreqcu;
|
reqcu = subreqcu;
|
||||||
reqcuflags = subreqcuflags;
|
reqcuflags = subreqcuflags;
|
||||||
|
@ -6708,7 +6713,7 @@ for (;; pptr++)
|
||||||
in that example, 'X' ends up set for both. */
|
in that example, 'X' ends up set for both. */
|
||||||
|
|
||||||
else if ((bravalue == OP_ASSERT || bravalue == OP_ASSERT_NA) &&
|
else if ((bravalue == OP_ASSERT || bravalue == OP_ASSERT_NA) &&
|
||||||
subreqcuflags >= 0 && subfirstcuflags >= 0)
|
subreqcuflags < REQ_NONE && subfirstcuflags < REQ_NONE)
|
||||||
{
|
{
|
||||||
reqcu = subreqcu;
|
reqcu = subreqcu;
|
||||||
reqcuflags = subreqcuflags;
|
reqcuflags = subreqcuflags;
|
||||||
|
@ -6738,7 +6743,7 @@ for (;; pptr++)
|
||||||
this name is duplicated. */
|
this name is duplicated. */
|
||||||
|
|
||||||
groupnumber = 0;
|
groupnumber = 0;
|
||||||
for (i = 0; i < cb->names_found; i++, ng++)
|
for (unsigned int i = 0; i < cb->names_found; i++, ng++)
|
||||||
{
|
{
|
||||||
if (length == ng->length &&
|
if (length == ng->length &&
|
||||||
PRIV(strncmp)(name, ng->name, length) == 0)
|
PRIV(strncmp)(name, ng->name, length) == 0)
|
||||||
|
@ -7092,7 +7097,7 @@ for (;; pptr++)
|
||||||
*lengthptr += delta;
|
*lengthptr += delta;
|
||||||
}
|
}
|
||||||
|
|
||||||
else for (i = 0; i < replicate; i++)
|
else for (int i = 0; i < replicate; i++)
|
||||||
{
|
{
|
||||||
memcpy(code, previous, CU2BYTES(1 + LINK_SIZE));
|
memcpy(code, previous, CU2BYTES(1 + LINK_SIZE));
|
||||||
previous = code;
|
previous = code;
|
||||||
|
@ -7268,12 +7273,12 @@ for (;; pptr++)
|
||||||
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (groupsetfirstcu && reqcuflags < 0)
|
if (groupsetfirstcu && reqcuflags >= REQ_NONE)
|
||||||
{
|
{
|
||||||
reqcu = firstcu;
|
reqcu = firstcu;
|
||||||
reqcuflags = firstcuflags;
|
reqcuflags = firstcuflags;
|
||||||
}
|
}
|
||||||
for (i = 1; (uint32_t)i < repeat_min; i++)
|
for (uint32_t i = 1; i < repeat_min; i++)
|
||||||
{
|
{
|
||||||
memcpy(code, previous, CU2BYTES(len));
|
memcpy(code, previous, CU2BYTES(len));
|
||||||
code += len;
|
code += len;
|
||||||
|
@ -7317,14 +7322,14 @@ for (;; pptr++)
|
||||||
|
|
||||||
/* This is compiling for real */
|
/* This is compiling for real */
|
||||||
|
|
||||||
else for (i = repeat_max - 1; i >= 0; i--)
|
else for (uint32_t i = repeat_max; i >= 1; i--)
|
||||||
{
|
{
|
||||||
*code++ = OP_BRAZERO + repeat_type;
|
*code++ = OP_BRAZERO + repeat_type;
|
||||||
|
|
||||||
/* All but the final copy start a new nesting, maintaining the
|
/* All but the final copy start a new nesting, maintaining the
|
||||||
chain of brackets outstanding. */
|
chain of brackets outstanding. */
|
||||||
|
|
||||||
if (i != 0)
|
if (i != 1)
|
||||||
{
|
{
|
||||||
int linkoffset;
|
int linkoffset;
|
||||||
*code++ = OP_BRA;
|
*code++ = OP_BRA;
|
||||||
|
@ -8043,9 +8048,9 @@ Arguments:
|
||||||
errorcodeptr -> pointer to error code variable
|
errorcodeptr -> pointer to error code variable
|
||||||
skipunits skip this many code units at start (for brackets and OP_COND)
|
skipunits skip this many code units at start (for brackets and OP_COND)
|
||||||
firstcuptr place to put the first required code unit
|
firstcuptr place to put the first required code unit
|
||||||
firstcuflagsptr place to put the first code unit flags, or a negative number
|
firstcuflagsptr place to put the first code unit flags
|
||||||
reqcuptr place to put the last required code unit
|
reqcuptr place to put the last required code unit
|
||||||
reqcuflagsptr place to put the last required code unit flags, or a negative number
|
reqcuflagsptr place to put the last required code unit flags
|
||||||
bcptr pointer to the chain of currently open branches
|
bcptr pointer to the chain of currently open branches
|
||||||
cb points to the data block with tables pointers etc.
|
cb points to the data block with tables pointers etc.
|
||||||
lengthptr NULL during the real compile phase
|
lengthptr NULL during the real compile phase
|
||||||
|
@ -8059,7 +8064,7 @@ Returns: 0 There has been an error
|
||||||
static int
|
static int
|
||||||
compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
|
compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
|
||||||
int *errorcodeptr, uint32_t skipunits, uint32_t *firstcuptr,
|
int *errorcodeptr, uint32_t skipunits, uint32_t *firstcuptr,
|
||||||
int32_t *firstcuflagsptr, uint32_t *reqcuptr,int32_t *reqcuflagsptr,
|
uint32_t *firstcuflagsptr, uint32_t *reqcuptr, uint32_t *reqcuflagsptr,
|
||||||
branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr)
|
branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr)
|
||||||
{
|
{
|
||||||
PCRE2_UCHAR *code = *codeptr;
|
PCRE2_UCHAR *code = *codeptr;
|
||||||
|
@ -8072,9 +8077,9 @@ int okreturn = 1;
|
||||||
uint32_t *pptr = *pptrptr;
|
uint32_t *pptr = *pptrptr;
|
||||||
uint32_t firstcu, reqcu;
|
uint32_t firstcu, reqcu;
|
||||||
uint32_t lookbehindlength;
|
uint32_t lookbehindlength;
|
||||||
int32_t firstcuflags, reqcuflags;
|
uint32_t firstcuflags, reqcuflags;
|
||||||
uint32_t branchfirstcu, branchreqcu;
|
uint32_t branchfirstcu, branchreqcu;
|
||||||
int32_t branchfirstcuflags, branchreqcuflags;
|
uint32_t branchfirstcuflags, branchreqcuflags;
|
||||||
PCRE2_SIZE length;
|
PCRE2_SIZE length;
|
||||||
branch_chain bc;
|
branch_chain bc;
|
||||||
|
|
||||||
|
@ -8193,9 +8198,9 @@ for (;;)
|
||||||
|
|
||||||
if (firstcuflags != branchfirstcuflags || firstcu != branchfirstcu)
|
if (firstcuflags != branchfirstcuflags || firstcu != branchfirstcu)
|
||||||
{
|
{
|
||||||
if (firstcuflags >= 0)
|
if (firstcuflags < REQ_NONE)
|
||||||
{
|
{
|
||||||
if (reqcuflags < 0)
|
if (reqcuflags >= REQ_NONE)
|
||||||
{
|
{
|
||||||
reqcu = firstcu;
|
reqcu = firstcu;
|
||||||
reqcuflags = firstcuflags;
|
reqcuflags = firstcuflags;
|
||||||
|
@ -8207,8 +8212,8 @@ for (;;)
|
||||||
/* If we (now or from before) have no firstcu, a firstcu from the
|
/* If we (now or from before) have no firstcu, a firstcu from the
|
||||||
branch becomes a reqcu if there isn't a branch reqcu. */
|
branch becomes a reqcu if there isn't a branch reqcu. */
|
||||||
|
|
||||||
if (firstcuflags < 0 && branchfirstcuflags >= 0 &&
|
if (firstcuflags >= REQ_NONE && branchfirstcuflags < REQ_NONE &&
|
||||||
branchreqcuflags < 0)
|
branchreqcuflags >= REQ_NONE)
|
||||||
{
|
{
|
||||||
branchreqcu = branchfirstcu;
|
branchreqcu = branchfirstcu;
|
||||||
branchreqcuflags = branchfirstcuflags;
|
branchreqcuflags = branchfirstcuflags;
|
||||||
|
@ -8356,7 +8361,7 @@ Returns: TRUE or FALSE
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static BOOL
|
static BOOL
|
||||||
is_anchored(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb,
|
is_anchored(PCRE2_SPTR code, uint32_t bracket_map, compile_block *cb,
|
||||||
int atomcount, BOOL inassert)
|
int atomcount, BOOL inassert)
|
||||||
{
|
{
|
||||||
do {
|
do {
|
||||||
|
@ -8379,7 +8384,7 @@ do {
|
||||||
op == OP_SCBRA || op == OP_SCBRAPOS)
|
op == OP_SCBRA || op == OP_SCBRAPOS)
|
||||||
{
|
{
|
||||||
int n = GET2(scode, 1+LINK_SIZE);
|
int n = GET2(scode, 1+LINK_SIZE);
|
||||||
int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
|
uint32_t new_map = bracket_map | ((n < 32)? (1u << n) : 1);
|
||||||
if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE;
|
if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8739,15 +8744,15 @@ Returns: the fixed first code unit, or 0 with REQ_NONE in flags
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static uint32_t
|
static uint32_t
|
||||||
find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, uint32_t inassert)
|
find_firstassertedcu(PCRE2_SPTR code, uint32_t *flags, uint32_t inassert)
|
||||||
{
|
{
|
||||||
uint32_t c = 0;
|
uint32_t c = 0;
|
||||||
int cflags = REQ_NONE;
|
uint32_t cflags = REQ_NONE;
|
||||||
|
|
||||||
*flags = REQ_NONE;
|
*flags = REQ_NONE;
|
||||||
do {
|
do {
|
||||||
uint32_t d;
|
uint32_t d;
|
||||||
int dflags;
|
uint32_t dflags;
|
||||||
int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
|
int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
|
||||||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
|
*code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
|
||||||
PCRE2_SPTR scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);
|
PCRE2_SPTR scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);
|
||||||
|
@ -8770,9 +8775,8 @@ do {
|
||||||
case OP_SCRIPT_RUN:
|
case OP_SCRIPT_RUN:
|
||||||
d = find_firstassertedcu(scode, &dflags, inassert +
|
d = find_firstassertedcu(scode, &dflags, inassert +
|
||||||
((op == OP_ASSERT || op == OP_ASSERT_NA)?1:0));
|
((op == OP_ASSERT || op == OP_ASSERT_NA)?1:0));
|
||||||
if (dflags < 0)
|
if (dflags >= REQ_NONE) return 0;
|
||||||
return 0;
|
if (cflags >= REQ_NONE) { c = d; cflags = dflags; }
|
||||||
if (cflags < 0) { c = d; cflags = dflags; }
|
|
||||||
else if (c != d || cflags != dflags) return 0;
|
else if (c != d || cflags != dflags) return 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -8785,7 +8789,7 @@ do {
|
||||||
case OP_MINPLUS:
|
case OP_MINPLUS:
|
||||||
case OP_POSPLUS:
|
case OP_POSPLUS:
|
||||||
if (inassert == 0) return 0;
|
if (inassert == 0) return 0;
|
||||||
if (cflags < 0) { c = scode[1]; cflags = 0; }
|
if (cflags >= REQ_NONE) { c = scode[1]; cflags = 0; }
|
||||||
else if (c != scode[1]) return 0;
|
else if (c != scode[1]) return 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -8811,7 +8815,7 @@ do {
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
|
if (cflags >= REQ_NONE) { c = scode[1]; cflags = REQ_CASELESS; }
|
||||||
else if (c != scode[1]) return 0;
|
else if (c != scode[1]) return 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -9747,7 +9751,7 @@ PCRE2_SIZE re_blocksize; /* Size of memory block */
|
||||||
PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */
|
PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */
|
||||||
PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */
|
PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */
|
||||||
|
|
||||||
int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
|
uint32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
|
||||||
uint32_t firstcu, reqcu; /* Value of first/req code unit */
|
uint32_t firstcu, reqcu; /* Value of first/req code unit */
|
||||||
uint32_t setflags = 0; /* NL and BSR set flags */
|
uint32_t setflags = 0; /* NL and BSR set flags */
|
||||||
|
|
||||||
|
@ -10427,13 +10431,13 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
|
||||||
(these are not saved during the compile because they can cause conflicts with
|
(these are not saved during the compile because they can cause conflicts with
|
||||||
actual literals that follow). */
|
actual literals that follow). */
|
||||||
|
|
||||||
if (firstcuflags < 0)
|
if (firstcuflags >= REQ_NONE)
|
||||||
firstcu = find_firstassertedcu(codestart, &firstcuflags, 0);
|
firstcu = find_firstassertedcu(codestart, &firstcuflags, 0);
|
||||||
|
|
||||||
/* Save the data for a first code unit. The existence of one means the
|
/* Save the data for a first code unit. The existence of one means the
|
||||||
minimum length must be at least 1. */
|
minimum length must be at least 1. */
|
||||||
|
|
||||||
if (firstcuflags >= 0)
|
if (firstcuflags < REQ_NONE)
|
||||||
{
|
{
|
||||||
re->first_codeunit = firstcu;
|
re->first_codeunit = firstcu;
|
||||||
re->flags |= PCRE2_FIRSTSET;
|
re->flags |= PCRE2_FIRSTSET;
|
||||||
|
@ -10480,16 +10484,16 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
|
||||||
different character and not a non-starting code unit of the first character,
|
different character and not a non-starting code unit of the first character,
|
||||||
because the minimum length count is in characters, not code units. */
|
because the minimum length count is in characters, not code units. */
|
||||||
|
|
||||||
if (reqcuflags >= 0)
|
if (reqcuflags < REQ_NONE)
|
||||||
{
|
{
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 16
|
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */
|
if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */
|
||||||
firstcuflags < 0 || /* First not set */
|
firstcuflags >= REQ_NONE || /* First not set */
|
||||||
(firstcu & 0xf800) != 0xd800 || /* First not surrogate */
|
(firstcu & 0xf800) != 0xd800 || /* First not surrogate */
|
||||||
(reqcu & 0xfc00) != 0xdc00) /* Req not low surrogate */
|
(reqcu & 0xfc00) != 0xdc00) /* Req not low surrogate */
|
||||||
#elif PCRE2_CODE_UNIT_WIDTH == 8
|
#elif PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */
|
if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */
|
||||||
firstcuflags < 0 || /* First not set */
|
firstcuflags >= REQ_NONE || /* First not set */
|
||||||
(firstcu & 0x80) == 0 || /* First is ASCII */
|
(firstcu & 0x80) == 0 || /* First is ASCII */
|
||||||
(reqcu & 0x80) == 0) /* Req is ASCII */
|
(reqcu & 0x80) == 0) /* Req is ASCII */
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -747,8 +747,8 @@ typedef struct compile_block {
|
||||||
uint32_t class_range_start; /* Overall class range start */
|
uint32_t class_range_start; /* Overall class range start */
|
||||||
uint32_t class_range_end; /* Overall class range end */
|
uint32_t class_range_end; /* Overall class range end */
|
||||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
|
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
|
||||||
|
uint32_t req_varyopt; /* "After variable item" flag for reqbyte */
|
||||||
int max_lookbehind; /* Maximum lookbehind (characters) */
|
int max_lookbehind; /* Maximum lookbehind (characters) */
|
||||||
int req_varyopt; /* "After variable item" flag for reqbyte */
|
|
||||||
BOOL had_accept; /* (*ACCEPT) encountered */
|
BOOL had_accept; /* (*ACCEPT) encountered */
|
||||||
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
||||||
BOOL had_recurse; /* Had a recursion or subroutine call */
|
BOOL had_recurse; /* Had a recursion or subroutine call */
|
||||||
|
|
|
@ -6571,7 +6571,7 @@ if (utf &&
|
||||||
/* Validate the relevant portion of the subject. There's a loop in case we
|
/* Validate the relevant portion of the subject. There's a loop in case we
|
||||||
encounter bad UTF in the characters preceding start_match which we are
|
encounter bad UTF in the characters preceding start_match which we are
|
||||||
scanning because of a lookbehind. */
|
scanning because of a lookbehind. */
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
match_data->rc = PRIV(valid_utf)(mb->check_subject,
|
match_data->rc = PRIV(valid_utf)(mb->check_subject,
|
||||||
|
@ -6591,7 +6591,7 @@ if (utf &&
|
||||||
/* If the end precedes start_match, it means there is invalid UTF in the
|
/* If the end precedes start_match, it means there is invalid UTF in the
|
||||||
extra code units we reversed over because of a lookbehind. Advance past the
|
extra code units we reversed over because of a lookbehind. Advance past the
|
||||||
first bad code unit, and then skip invalid character starting code units in
|
first bad code unit, and then skip invalid character starting code units in
|
||||||
8-bit and 16-bit modes, and try again. */
|
8-bit and 16-bit modes, and try again with the original end point. */
|
||||||
|
|
||||||
if (end_subject < start_match)
|
if (end_subject < start_match)
|
||||||
{
|
{
|
||||||
|
@ -6600,6 +6600,7 @@ if (utf &&
|
||||||
while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
|
while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
|
||||||
mb->check_subject++;
|
mb->check_subject++;
|
||||||
#endif
|
#endif
|
||||||
|
end_subject = true_end_subject;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Otherwise, set the not end of line option, and do the match. */
|
/* Otherwise, set the not end of line option, and do the match. */
|
||||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
New API code Copyright (c) 2016-2021 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -908,7 +908,7 @@ set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
|
||||||
{
|
{
|
||||||
uint32_t c;
|
uint32_t c;
|
||||||
for (c = 0; c < table_limit; c++)
|
for (c = 0; c < table_limit; c++)
|
||||||
re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]);
|
re->start_bitmap[c] |= (uint8_t)(~(re->tables[c+cbits_offset+cbit_type]));
|
||||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
|
if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -259,7 +259,7 @@ PCRE2_UNSET, so as not to imply an offset in the replacement. */
|
||||||
|
|
||||||
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
|
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
|
||||||
return PCRE2_ERROR_BADOPTION;
|
return PCRE2_ERROR_BADOPTION;
|
||||||
|
|
||||||
/* Validate length and find the end of the replacement. A NULL replacement of
|
/* Validate length and find the end of the replacement. A NULL replacement of
|
||||||
zero length is interpreted as an empty string. */
|
zero length is interpreted as an empty string. */
|
||||||
|
|
||||||
|
@ -304,7 +304,6 @@ else if (use_existing_match)
|
||||||
(pcre2_general_context *)mcontext;
|
(pcre2_general_context *)mcontext;
|
||||||
int pairs = (code->top_bracket + 1 < match_data->oveccount)?
|
int pairs = (code->top_bracket + 1 < match_data->oveccount)?
|
||||||
code->top_bracket + 1 : match_data->oveccount;
|
code->top_bracket + 1 : match_data->oveccount;
|
||||||
if (subject == NULL) return PCRE2_ERROR_NULL;
|
|
||||||
internal_match_data = pcre2_match_data_create(match_data->oveccount,
|
internal_match_data = pcre2_match_data_create(match_data->oveccount,
|
||||||
gcontext);
|
gcontext);
|
||||||
if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
@ -325,7 +324,16 @@ scb.input = subject;
|
||||||
scb.output = (PCRE2_SPTR)buffer;
|
scb.output = (PCRE2_SPTR)buffer;
|
||||||
scb.ovector = ovector;
|
scb.ovector = ovector;
|
||||||
|
|
||||||
/* Find lengths of zero-terminated subject */
|
/* A NULL subject of zero length is treated as an empty string. */
|
||||||
|
|
||||||
|
if (subject == NULL)
|
||||||
|
{
|
||||||
|
if (length != 0) return PCRE2_ERROR_NULL;
|
||||||
|
subject = (PCRE2_SPTR)"";
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Find length of zero-terminated subject */
|
||||||
|
|
||||||
if (length == PCRE2_ZERO_TERMINATED)
|
if (length == PCRE2_ZERO_TERMINATED)
|
||||||
length = subject? PRIV(strlen)(subject) : 0;
|
length = subject? PRIV(strlen)(subject) : 0;
|
||||||
|
|
||||||
|
|
|
@ -3152,7 +3152,7 @@ Returns: 0 on success, with the length updated to the number of 16-bit
|
||||||
OR -3 if a value > 0xffff is encountered when not in UTF mode
|
OR -3 if a value > 0xffff is encountered when not in UTF mode
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static PCRE2_SIZE
|
static int
|
||||||
to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
|
to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
|
||||||
{
|
{
|
||||||
uint16_t *pp;
|
uint16_t *pp;
|
||||||
|
@ -3239,7 +3239,7 @@ Returns: 0 on success, with the length updated to the number of 32-bit
|
||||||
OR -2 if a value > 0x10ffff is encountered in UTF mode
|
OR -2 if a value > 0x10ffff is encountered in UTF mode
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static PCRE2_SIZE
|
static int
|
||||||
to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
|
to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
|
||||||
{
|
{
|
||||||
uint32_t *pp;
|
uint32_t *pp;
|
||||||
|
@ -5490,24 +5490,27 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
|
||||||
if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
|
if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
|
||||||
{
|
{
|
||||||
show_compile_options(
|
show_compile_options(
|
||||||
pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
|
pat_patctl.options & (uint32_t)(~POSIX_SUPPORTED_COMPILE_OPTIONS),
|
||||||
msg = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((FLD(pat_context, extra_options) &
|
|
||||||
~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0)
|
|
||||||
{
|
|
||||||
show_compile_extra_options(
|
|
||||||
FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS,
|
|
||||||
msg, "");
|
msg, "");
|
||||||
msg = "";
|
msg = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
|
if ((FLD(pat_context, extra_options) &
|
||||||
(pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
|
(uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS)) != 0)
|
||||||
{
|
{
|
||||||
show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS,
|
show_compile_extra_options(
|
||||||
pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg);
|
FLD(pat_context, extra_options) &
|
||||||
|
(uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS), msg, "");
|
||||||
|
msg = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS)) != 0 ||
|
||||||
|
(pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2)) != 0)
|
||||||
|
{
|
||||||
|
show_controls(
|
||||||
|
pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS),
|
||||||
|
pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2),
|
||||||
|
msg);
|
||||||
msg = "";
|
msg = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7652,11 +7655,15 @@ for (gmatched = 0;; gmatched++)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The result of the match is now in capcount. First handle a successful
|
/* The result of the match is now in capcount. First handle a successful
|
||||||
match. */
|
match. If pp was forced to be NULL (to test NULL handling) it will have been
|
||||||
|
treated as an empty string if the length was zero. So re-create that for
|
||||||
|
outputting. */
|
||||||
|
|
||||||
if (capcount >= 0)
|
if (capcount >= 0)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
if (pp == NULL) pp = (uint8_t *)"";
|
||||||
|
|
||||||
if (capcount > (int)oveccount) /* Check for lunatic return value */
|
if (capcount > (int)oveccount) /* Check for lunatic return value */
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue