Source tidies (trailing spaces) etc. for 10.34-RC1.
This commit is contained in:
parent
7ecc9cdfaf
commit
ae9208ab7b
64
ChangeLog
64
ChangeLog
|
@ -80,14 +80,14 @@ otherwise), an atomic group, or a recursion.
|
|||
|
||||
15. Give error if pcre2test -t, -T, -tm or -TM is given an argument of zero.
|
||||
|
||||
16. Check for integer overflow when computing lookbehind lengths. Fixes
|
||||
16. Check for integer overflow when computing lookbehind lengths. Fixes
|
||||
Clusterfuzz issue 15636.
|
||||
|
||||
17. Implemented non-atomic positive lookaround assertions.
|
||||
|
||||
18. If a lookbehind contained a lookahead that contained another lookbehind
|
||||
within it, the nested lookbehind was not correctly processed. For example, if
|
||||
/(?<=(?=(?<=a)))b/ was matched to "ab" it gave no match instead of matching
|
||||
within it, the nested lookbehind was not correctly processed. For example, if
|
||||
/(?<=(?=(?<=a)))b/ was matched to "ab" it gave no match instead of matching
|
||||
"b".
|
||||
|
||||
19. Implemented pcre2_get_match_data_size().
|
||||
|
@ -98,34 +98,34 @@ within it, the nested lookbehind was not correctly processed. For example, if
|
|||
contains any lookbehinds, an empty partial match may be given, because this
|
||||
is another situation where adding characters to the current subject can
|
||||
lead to a full match. Example: /c*+(?<=[bc])/ with subject "ab".
|
||||
|
||||
(b) Similarly, if a pattern could match an empty string, an empty partial
|
||||
match may be given. Example: /(?![ab]).*/ with subject "ab". This case
|
||||
|
||||
(b) Similarly, if a pattern could match an empty string, an empty partial
|
||||
match may be given. Example: /(?![ab]).*/ with subject "ab". This case
|
||||
applies only to PCRE2_PARTIAL_HARD.
|
||||
|
||||
|
||||
(c) An empty string partial hard match can be returned for \z and \Z as it
|
||||
is documented that they shouldn't match.
|
||||
|
||||
is documented that they shouldn't match.
|
||||
|
||||
21. A branch that started with (*ACCEPT) was not being recognized as one that
|
||||
could match an empty string.
|
||||
could match an empty string.
|
||||
|
||||
22. Corrected pcre2_set_character_tables() tables data type: was const unsigned
|
||||
char * instead of const uint8_t *, as generated by pcre2_maketables().
|
||||
|
||||
23. Upgraded to Unicode 12.1.0.
|
||||
|
||||
24. Add -jitfast command line option to pcre2test (to make all the jit options
|
||||
24. Add -jitfast command line option to pcre2test (to make all the jit options
|
||||
available directly).
|
||||
|
||||
25. Make pcre2test -C show if libreadline or libedit is supported.
|
||||
|
||||
26. If the length of one branch of a group exceeded 65535 (the maximum value
|
||||
that is remembered as a minimum length), the whole group's length was
|
||||
incorrectly recorded as 65535, leading to incorrect "no match" when start-up
|
||||
that is remembered as a minimum length), the whole group's length was
|
||||
incorrectly recorded as 65535, leading to incorrect "no match" when start-up
|
||||
optimizations were in force.
|
||||
|
||||
27. The "rightmost consulted character" value was not always correct; in
|
||||
particular, if a pattern ended with a negative lookahead, characters that were
|
||||
27. The "rightmost consulted character" value was not always correct; in
|
||||
particular, if a pattern ended with a negative lookahead, characters that were
|
||||
inspected in that lookahead were not included.
|
||||
|
||||
28. Add the pcre2_maketables_free() function.
|
||||
|
@ -134,13 +134,13 @@ inspected in that lookahead were not included.
|
|||
code unit in the interpretive engines uses memchr() in 8-bit mode. When the
|
||||
search is caseless, it was doing so inefficiently, which ended up slowing down
|
||||
the match drastically when the subject was very long. The revised code (a)
|
||||
remembers if one case is not found, so it never repeats the search for that
|
||||
remembers if one case is not found, so it never repeats the search for that
|
||||
case after a bumpalong and (b) when one case has been found, it searches only
|
||||
up to that position for an earlier occurrence of the other case. This fix
|
||||
applies to both interpretive pcre2_match() and to pcre2_dfa_match().
|
||||
applies to both interpretive pcre2_match() and to pcre2_dfa_match().
|
||||
|
||||
30. While scanning to find the minimum length of a group, if any branch has
|
||||
minimum length zero, there is no need to scan any subsequent branches (a small
|
||||
30. While scanning to find the minimum length of a group, if any branch has
|
||||
minimum length zero, there is no need to scan any subsequent branches (a small
|
||||
compile-time performance improvement).
|
||||
|
||||
31. Installed a .gitignore file on a user's suggestion. When using the svn
|
||||
|
@ -149,29 +149,29 @@ repository with git (through git svn) this helps keep it tidy.
|
|||
32. Add underflow check in JIT which may occur when the value of subject
|
||||
string pointer is close to 0.
|
||||
|
||||
33. Arrange for classes such as [Aa] which contain just the two cases of the
|
||||
same character, to be treated as a single caseless character. This causes the
|
||||
33. Arrange for classes such as [Aa] which contain just the two cases of the
|
||||
same character, to be treated as a single caseless character. This causes the
|
||||
first and required code unit optimizations to kick in where relevant.
|
||||
|
||||
34. Improve the bitmap of starting bytes for positive classes that include wide
|
||||
characters, but no property types, in UTF-8 mode. Previously, on encountering
|
||||
such a class, the bits for all bytes greater than \xc4 were set, thus
|
||||
specifying any character with codepoint >= 0x100. Now the only bits that are
|
||||
set are for the relevant bytes that start the wide characters. This can give a
|
||||
set are for the relevant bytes that start the wide characters. This can give a
|
||||
noticeable performance improvement.
|
||||
|
||||
35. If the bitmap of starting code units contains only 1 or 2 bits, replace it
|
||||
with a single starting code unit (1 bit) or a caseless single starting code
|
||||
unit if the two relevant characters are case-partners. This is particularly
|
||||
relevant to the 8-bit library, though it applies to all. It can give a
|
||||
performance boost for patterns such as [Ww]ord and (word|WORD). However, this
|
||||
optimization doesn't happen if there is a "required" code unit of the same
|
||||
value (because the search for a "required" code unit starts at the match start
|
||||
for non-unique first code unit patterns, but after a unique first code unit,
|
||||
35. If the bitmap of starting code units contains only 1 or 2 bits, replace it
|
||||
with a single starting code unit (1 bit) or a caseless single starting code
|
||||
unit if the two relevant characters are case-partners. This is particularly
|
||||
relevant to the 8-bit library, though it applies to all. It can give a
|
||||
performance boost for patterns such as [Ww]ord and (word|WORD). However, this
|
||||
optimization doesn't happen if there is a "required" code unit of the same
|
||||
value (because the search for a "required" code unit starts at the match start
|
||||
for non-unique first code unit patterns, but after a unique first code unit,
|
||||
and patterns such as a*a need the former action).
|
||||
|
||||
36. Small patch to pcre2posix.c to set the erroroffset field to -1 immediately
|
||||
after a successful compile, instead of at the start of matching to avoid a
|
||||
36. Small patch to pcre2posix.c to set the erroroffset field to -1 immediately
|
||||
after a successful compile, instead of at the start of matching to avoid a
|
||||
sanitizer complaint (regexec is supposed to be thread safe).
|
||||
|
||||
|
||||
|
|
6
NEWS
6
NEWS
|
@ -5,15 +5,15 @@ News about PCRE2 releases
|
|||
Version 10.34 15-October-2019
|
||||
-----------------------------
|
||||
|
||||
Another release with a few enhancements as well as bugfixes and tidies. The
|
||||
Another release with a few enhancements as well as bugfixes and tidies. The
|
||||
main new features are:
|
||||
|
||||
1. There is now some support for matching in invalid UTF strings.
|
||||
|
||||
2. Non-atomic positive lookarounds are implemented in the pcre2_match()
|
||||
2. Non-atomic positive lookarounds are implemented in the pcre2_match()
|
||||
interpreter, but not in JIT.
|
||||
|
||||
3. Added two new functions: pcre2_get_match_data_size() and
|
||||
3. Added two new functions: pcre2_get_match_data_size() and
|
||||
pcre2_maketables_free().
|
||||
|
||||
4. Upgraded to Unicode 12.1.0.
|
||||
|
|
|
@ -683,7 +683,7 @@ if [ $utf8 -ne 0 ] ; then
|
|||
echo "---------------------------- Test U3 ------------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -u --newline=any '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
|
||||
echo "---------------------------- Test U4 ------------------------------" >>testtrygrep
|
||||
printf 'A\341\200\200\200CD\342\200\200Z\n' >testtemp1grep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -u -o '....' $builddir/testtemp1grep) >>testtrygrep 2>&1
|
||||
|
|
|
@ -65,7 +65,7 @@ The option bits are:
|
|||
PCRE2_EXTENDED Ignore white space and # comments
|
||||
PCRE2_FIRSTLINE Force matching to be before newline
|
||||
PCRE2_LITERAL Pattern characters are all literal
|
||||
PCRE2_MATCH_INVALID_UTF Enable support for matching invalid UTF
|
||||
PCRE2_MATCH_INVALID_UTF Enable support for matching invalid UTF
|
||||
PCRE2_MATCH_UNSET_BACKREF Match unset backreferences
|
||||
PCRE2_MULTILINE ^ and $ match newlines within data
|
||||
PCRE2_NEVER_BACKSLASH_C Lock out the use of \C in patterns
|
||||
|
|
|
@ -25,7 +25,7 @@ SYNOPSIS
|
|||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function returns the size, in bytes, of the match data block that is its
|
||||
This function returns the size, in bytes, of the match data block that is its
|
||||
argument.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -41,8 +41,8 @@ bits:
|
|||
PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching
|
||||
PCRE2_JIT_PARTIAL_HARD compile code for hard partial matching
|
||||
</pre>
|
||||
There is also an obsolete option called PCRE2_JIT_INVALID_UTF, which has been
|
||||
superseded by the <b>pcre2_compile()</b> option PCRE2_MATCH_INVALID_UTF. The old
|
||||
There is also an obsolete option called PCRE2_JIT_INVALID_UTF, which has been
|
||||
superseded by the <b>pcre2_compile()</b> option PCRE2_MATCH_INVALID_UTF. The old
|
||||
option is deprecated and may be removed in the future.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -2040,7 +2040,7 @@ letters), the following code could be used:
|
|||
re = pcre2_compile(..., ccontext);
|
||||
</pre>
|
||||
The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
|
||||
are using Windows, the name for the French locale is "french".
|
||||
are using Windows, the name for the French locale is "french".
|
||||
</P>
|
||||
<P>
|
||||
The pointer that is passed (via the compile context) to <b>pcre2_compile()</b>
|
||||
|
@ -2282,8 +2282,8 @@ actually inspect the previous character.
|
|||
</P>
|
||||
<P>
|
||||
Note that this information is useful for multi-segment matching only
|
||||
if the pattern contains no nested lookbehinds. For example, the pattern
|
||||
(?<=a(?<=ba)c) returns a maximum lookbehind of 2, but when it is processed, the
|
||||
if the pattern contains no nested lookbehinds. For example, the pattern
|
||||
(?<=a(?<=ba)c) returns a maximum lookbehind of 2, but when it is processed, the
|
||||
first lookbehind moves back by two characters, matches one character, then the
|
||||
nested lookbehind also moves back by two characters. This puts the matching
|
||||
point three characters earlier than it was at the start.
|
||||
|
@ -2743,8 +2743,8 @@ Your program may crash or loop indefinitely or give wrong results.
|
|||
</pre>
|
||||
These options turn on the partial matching feature. A partial match occurs if
|
||||
the end of the subject string is reached successfully, but there are not enough
|
||||
subject characters to complete the match. In addition, either at least one
|
||||
character must have been inspected or the pattern must contain a lookbehind, or
|
||||
subject characters to complete the match. In addition, either at least one
|
||||
character must have been inspected or the pattern must contain a lookbehind, or
|
||||
the pattern must be one that could match an empty string.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -207,7 +207,7 @@ the start of a pattern that set overall options that cannot be changed within
|
|||
the pattern.
|
||||
<br>
|
||||
<br>
|
||||
(m) PCRE2 supports non-atomic positive lookaround assertions. This is an
|
||||
(m) PCRE2 supports non-atomic positive lookaround assertions. This is an
|
||||
extension to the lookaround facilities. The default, Perl-compatible
|
||||
lookarounds are atomic.
|
||||
</P>
|
||||
|
|
|
@ -709,7 +709,7 @@ but one option).
|
|||
</P>
|
||||
<P>
|
||||
<b>--om-capture</b>=<i>number</i>
|
||||
Set the number of capturing parentheses that can be accessed by <b>-o</b>. The
|
||||
Set the number of capturing parentheses that can be accessed by <b>-o</b>. The
|
||||
default is 50.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -189,7 +189,7 @@ code unit) at a time, for all active paths through the tree.
|
|||
supported. (*FAIL) is supported, and behaves like a failing negative assertion.
|
||||
</P>
|
||||
<P>
|
||||
10. The PCRE2_MATCH_INVALID_UTF option for <b>pcre2_compile()</b> is not
|
||||
10. The PCRE2_MATCH_INVALID_UTF option for <b>pcre2_compile()</b> is not
|
||||
supported by <b>pcre2_dfa_match()</b>.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">ADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br>
|
||||
|
@ -223,7 +223,7 @@ because it has to search for all possible matches, but is also because it is
|
|||
less susceptible to optimization.
|
||||
</P>
|
||||
<P>
|
||||
2. Capturing parentheses, backreferences, script runs, and matching within
|
||||
2. Capturing parentheses, backreferences, script runs, and matching within
|
||||
invalid UTF string are not supported.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -275,9 +275,9 @@ Note the use of the <b>offset</b> modifier to start the new match where the
|
|||
partial match was found. In this example, the next segment was added to the one
|
||||
in which the partial match was found. This is the most straightforward
|
||||
approach, typically using a memory buffer that is twice the size of each
|
||||
segment. After a partial match, the first half of the buffer is discarded, the
|
||||
second half is moved to the start of the buffer, and a new segment is added
|
||||
before repeating the match as in the example above. After a no match, the
|
||||
segment. After a partial match, the first half of the buffer is discarded, the
|
||||
second half is moved to the start of the buffer, and a new segment is added
|
||||
before repeating the match as in the example above. After a no match, the
|
||||
entire buffer can be discarded.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -2021,8 +2021,8 @@ Earlier versions of Perl and PCRE1 used to give an error at compile time for
|
|||
such patterns. However, because there are cases where this can be useful, such
|
||||
patterns are now accepted, but whenever an iteration of such a group matches no
|
||||
characters, matching moves on to the next item in the pattern instead of
|
||||
repeatedly matching an empty string. This does not prevent backtracking into
|
||||
any of the iterations if a subsequent item fails to match.
|
||||
repeatedly matching an empty string. This does not prevent backtracking into
|
||||
any of the iterations if a subsequent item fails to match.
|
||||
</P>
|
||||
<P>
|
||||
By default, quantifiers are "greedy", that is, they match as much as possible
|
||||
|
@ -2374,7 +2374,7 @@ in the subject string reset to what it was before the assertion was processed.
|
|||
<P>
|
||||
The Perl-compatible lookaround assertions are atomic. If an assertion is true,
|
||||
but there is a subsequent matching failure, there is no backtracking into the
|
||||
assertion. However, there are some cases where non-atomic assertions can be
|
||||
assertion. However, there are some cases where non-atomic assertions can be
|
||||
useful. PCRE2 has some support for these, described in the section entitled
|
||||
<a href="#nonatomicassertions">"Non-atomic assertions"</a>
|
||||
below, but they are not Perl-compatible.
|
||||
|
@ -2621,11 +2621,11 @@ characters that are not "999".
|
|||
The traditional Perl-compatible lookaround assertions are atomic. That is, if
|
||||
an assertion is true, but there is a subsequent matching failure, there is no
|
||||
backtracking into the assertion. However, there are some cases where non-atomic
|
||||
positive assertions can be useful. PCRE2 provides these using the following
|
||||
positive assertions can be useful. PCRE2 provides these using the following
|
||||
syntax:
|
||||
<pre>
|
||||
(*non_atomic_positive_lookahead: or (*napla:
|
||||
(*non_atomic_positive_lookbehind: or (*naplb:
|
||||
(*non_atomic_positive_lookbehind: or (*naplb:
|
||||
</pre>
|
||||
Consider the problem of finding the right-most word in a string that also
|
||||
appears earlier in the string, that is, it must appear at least twice in total.
|
||||
|
@ -2633,8 +2633,8 @@ This pattern returns the required result as captured substring 1:
|
|||
<pre>
|
||||
^(?x)(*napla: .* \b(\w++)) (?> .*? \b\1\b ){2}
|
||||
</pre>
|
||||
For a subject such as "word1 word2 word3 word2 word3 word4" the result is
|
||||
"word3". How does it work? At the start, ^(?x) anchors the pattern and sets the
|
||||
For a subject such as "word1 word2 word3 word2 word3 word4" the result is
|
||||
"word3". How does it work? At the start, ^(?x) anchors the pattern and sets the
|
||||
"x" option, which causes white space (introduced for readability) to be
|
||||
ignored. Inside the assertion, the greedy .* at first consumes the entire
|
||||
string, but then has to backtrack until the rest of the assertion can match a
|
||||
|
@ -2643,9 +2643,9 @@ succeeds, it captures the right-most word in the string.
|
|||
</P>
|
||||
<P>
|
||||
The current matching point is then reset to the start of the subject, and the
|
||||
rest of the pattern match checks for two occurrences of the captured word,
|
||||
using an ungreedy .*? to scan from the left. If this succeeds, we are done, but
|
||||
if the last word in the string does not occur twice, this part of the pattern
|
||||
rest of the pattern match checks for two occurrences of the captured word,
|
||||
using an ungreedy .*? to scan from the left. If this succeeds, we are done, but
|
||||
if the last word in the string does not occur twice, this part of the pattern
|
||||
fails. If a traditional atomic lookhead (?= or (*pla: had been used, the
|
||||
assertion could not be re-entered, and the whole match would fail. The pattern
|
||||
would succeed only if the very last word in the subject was found twice.
|
||||
|
|
|
@ -553,7 +553,7 @@ Each top-level branch of a lookbehind must be of a fixed length.
|
|||
<P>
|
||||
These assertions are specific to PCRE2 and are not Perl-compatible.
|
||||
<pre>
|
||||
(*napla:...)
|
||||
(*napla:...)
|
||||
(*non_atomic_positive_lookahead:...)
|
||||
|
||||
(*naplb:...)
|
||||
|
|
|
@ -245,7 +245,7 @@ compilation, each pattern is passed to the just-in-time compiler, if available.
|
|||
<b>-jitfast</b>
|
||||
Behave as if each pattern line has the <b>jitfast</b> modifier; after
|
||||
successful compilation, each pattern is passed to the just-in-time compiler, if
|
||||
available, and each subject line is passed directly to the JIT matcher via its
|
||||
available, and each subject line is passed directly to the JIT matcher via its
|
||||
"fast path".
|
||||
</P>
|
||||
<P>
|
||||
|
@ -620,7 +620,7 @@ for a description of the effects of these options.
|
|||
firstline set PCRE2_FIRSTLINE
|
||||
literal set PCRE2_LITERAL
|
||||
match_line set PCRE2_EXTRA_MATCH_LINE
|
||||
match_invalid_utf set PCRE2_MATCH_INVALID_UTF
|
||||
match_invalid_utf set PCRE2_MATCH_INVALID_UTF
|
||||
match_unset_backref set PCRE2_MATCH_UNSET_BACKREF
|
||||
match_word set PCRE2_EXTRA_MATCH_WORD
|
||||
/m multiline set PCRE2_MULTILINE
|
||||
|
@ -746,8 +746,8 @@ options, the line is omitted. "First code unit" is where any match must start;
|
|||
if there is more than one they are listed as "starting code units". "Last code
|
||||
unit" is the last literal code unit that must be present in any match. This is
|
||||
not necessarily the last character. These lines are omitted if no starting or
|
||||
ending code units are recorded. The subject length line is omitted when
|
||||
<b>no_start_optimize</b> is set because the minimum length is not calculated
|
||||
ending code units are recorded. The subject length line is omitted when
|
||||
<b>no_start_optimize</b> is set because the minimum length is not calculated
|
||||
when it can never be used.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -1278,7 +1278,7 @@ Here is an example:
|
|||
</pre>
|
||||
The first, complete match shows that the matched string is "abc", with the
|
||||
preceding and following strings "pqr" and "xyz" having been consulted during
|
||||
the match (when processing the assertions). The partial match can indicate only
|
||||
the match (when processing the assertions). The partial match can indicate only
|
||||
the preceding string.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -24,13 +24,13 @@ width), but this is not the default. Unless specifically requested, PCRE2
|
|||
treats each code unit in a string as one character.
|
||||
</P>
|
||||
<P>
|
||||
There are two ways of telling PCRE2 to switch to UTF mode, where characters may
|
||||
consist of more than one code unit and the range of values is constrained. The
|
||||
There are two ways of telling PCRE2 to switch to UTF mode, where characters may
|
||||
consist of more than one code unit and the range of values is constrained. The
|
||||
program can call
|
||||
<a href="pcre2_compile.html"><b>pcre2_compile()</b></a>
|
||||
with the PCRE2_UTF option, or the pattern may start with the sequence (*UTF).
|
||||
However, the latter facility can be locked out by the PCRE2_NEVER_UTF option.
|
||||
That is, the programmer can prevent the supplier of the pattern from switching
|
||||
That is, the programmer can prevent the supplier of the pattern from switching
|
||||
to UTF mode.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -425,7 +425,7 @@ You can run pattern matches on subject strings that may contain invalid UTF
|
|||
sequences if you call <b>pcre2_compile()</b> with the PCRE2_MATCH_INVALID_UTF
|
||||
option. This is supported by <b>pcre2_match()</b>, including JIT matching, but
|
||||
not by <b>pcre2_dfa_match()</b>. When PCRE2_MATCH_INVALID_UTF is set, it forces
|
||||
PCRE2_UTF to be set as well. Note, however, that the pattern itself must be a
|
||||
PCRE2_UTF to be set as well. Note, however, that the pattern itself must be a
|
||||
valid UTF string.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -461,7 +461,7 @@ UTF character, or the end of the subject.
|
|||
</P>
|
||||
<P>
|
||||
At internal fragment boundaries, \b and \B behave in the same way as at the
|
||||
beginning and end of the subject. For example, a sequence such as \bWORD\b
|
||||
beginning and end of the subject. For example, a sequence such as \bWORD\b
|
||||
would match an instance of WORD that is surrounded by invalid UTF code units.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -180,8 +180,8 @@ REVISION
|
|||
Last updated: 17 September 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2API(3) Library Functions Manual PCRE2API(3)
|
||||
|
||||
|
||||
|
@ -3724,8 +3724,8 @@ REVISION
|
|||
Last updated: 02 September 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3)
|
||||
|
||||
|
||||
|
@ -4296,8 +4296,8 @@ REVISION
|
|||
Last updated: 03 March 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3)
|
||||
|
||||
|
||||
|
@ -4726,8 +4726,8 @@ REVISION
|
|||
Last updated: 03 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3)
|
||||
|
||||
|
||||
|
@ -4935,8 +4935,8 @@ REVISION
|
|||
Last updated: 13 July 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2JIT(3) Library Functions Manual PCRE2JIT(3)
|
||||
|
||||
|
||||
|
@ -5360,8 +5360,8 @@ REVISION
|
|||
Last updated: 23 May 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3)
|
||||
|
||||
|
||||
|
@ -5430,8 +5430,8 @@ REVISION
|
|||
Last updated: 02 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3)
|
||||
|
||||
|
||||
|
@ -5654,8 +5654,8 @@ REVISION
|
|||
Last updated: 23 May 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3)
|
||||
|
||||
|
||||
|
@ -6034,8 +6034,8 @@ REVISION
|
|||
Last updated: 04 September 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3)
|
||||
|
||||
|
||||
|
@ -9466,8 +9466,8 @@ REVISION
|
|||
Last updated: 29 July 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3)
|
||||
|
||||
|
||||
|
@ -9701,8 +9701,8 @@ REVISION
|
|||
Last updated: 03 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2POSIX(3) Library Functions Manual PCRE2POSIX(3)
|
||||
|
||||
|
||||
|
@ -10031,8 +10031,8 @@ REVISION
|
|||
Last updated: 30 January 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3)
|
||||
|
||||
|
||||
|
@ -10310,8 +10310,8 @@ REVISION
|
|||
Last updated: 27 June 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2SYNTAX(3) Library Functions Manual PCRE2SYNTAX(3)
|
||||
|
||||
|
||||
|
@ -10823,8 +10823,8 @@ REVISION
|
|||
Last updated: 29 July 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3)
|
||||
|
||||
|
||||
|
@ -11256,5 +11256,5 @@ REVISION
|
|||
Last updated: 24 May 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ The option bits are:
|
|||
PCRE2_EXTENDED Ignore white space and # comments
|
||||
PCRE2_FIRSTLINE Force matching to be before newline
|
||||
PCRE2_LITERAL Pattern characters are all literal
|
||||
PCRE2_MATCH_INVALID_UTF Enable support for matching invalid UTF
|
||||
PCRE2_MATCH_INVALID_UTF Enable support for matching invalid UTF
|
||||
PCRE2_MATCH_UNSET_BACKREF Match unset backreferences
|
||||
PCRE2_MULTILINE ^ and $ match newlines within data
|
||||
PCRE2_NEVER_BACKSLASH_C Lock out the use of \eC in patterns
|
||||
|
|
|
@ -13,7 +13,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function returns the size, in bytes, of the match data block that is its
|
||||
This function returns the size, in bytes, of the match data block that is its
|
||||
argument.
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -30,8 +30,8 @@ bits:
|
|||
PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching
|
||||
PCRE2_JIT_PARTIAL_HARD compile code for hard partial matching
|
||||
.sp
|
||||
There is also an obsolete option called PCRE2_JIT_INVALID_UTF, which has been
|
||||
superseded by the \fBpcre2_compile()\fP option PCRE2_MATCH_INVALID_UTF. The old
|
||||
There is also an obsolete option called PCRE2_JIT_INVALID_UTF, which has been
|
||||
superseded by the \fBpcre2_compile()\fP option PCRE2_MATCH_INVALID_UTF. The old
|
||||
option is deprecated and may be removed in the future.
|
||||
.P
|
||||
The yield of the function is 0 for success, or a negative error code otherwise.
|
||||
|
|
|
@ -247,7 +247,7 @@ document for an overview of all the PCRE2 documentation.
|
|||
.sp
|
||||
.B const uint8_t *pcre2_maketables(pcre2_general_context *\fIgcontext\fP);
|
||||
.sp
|
||||
.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP,
|
||||
.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP,
|
||||
.B " const uint8_t *\fItables\fP);"
|
||||
.sp
|
||||
.B int pcre2_pattern_info(const pcre2_code *\fIcode\fP, uint32_t \fIwhat\fP,
|
||||
|
@ -1949,7 +1949,7 @@ Most (but not all) patterns can be optimized by the JIT compiler.
|
|||
.nf
|
||||
.B const uint8_t *pcre2_maketables(pcre2_general_context *\fIgcontext\fP);
|
||||
.sp
|
||||
.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP,
|
||||
.B void pcre2_maketables_free(pcre2_general_context *\fIgcontext\fP,
|
||||
.B " const uint8_t *\fItables\fP);"
|
||||
.fi
|
||||
.P
|
||||
|
@ -1995,7 +1995,7 @@ letters), the following code could be used:
|
|||
re = pcre2_compile(..., ccontext);
|
||||
.sp
|
||||
The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
|
||||
are using Windows, the name for the French locale is "french".
|
||||
are using Windows, the name for the French locale is "french".
|
||||
.P
|
||||
The pointer that is passed (via the compile context) to \fBpcre2_compile()\fP
|
||||
is saved with the compiled pattern, and the same tables are used by
|
||||
|
@ -2241,8 +2241,8 @@ longer. \eA also registers a one-character lookbehind, though it does not
|
|||
actually inspect the previous character.
|
||||
.P
|
||||
Note that this information is useful for multi-segment matching only
|
||||
if the pattern contains no nested lookbehinds. For example, the pattern
|
||||
(?<=a(?<=ba)c) returns a maximum lookbehind of 2, but when it is processed, the
|
||||
if the pattern contains no nested lookbehinds. For example, the pattern
|
||||
(?<=a(?<=ba)c) returns a maximum lookbehind of 2, but when it is processed, the
|
||||
first lookbehind moves back by two characters, matches one character, then the
|
||||
nested lookbehind also moves back by two characters. This puts the matching
|
||||
point three characters earlier than it was at the start.
|
||||
|
@ -2734,8 +2734,8 @@ Your program may crash or loop indefinitely or give wrong results.
|
|||
.sp
|
||||
These options turn on the partial matching feature. A partial match occurs if
|
||||
the end of the subject string is reached successfully, but there are not enough
|
||||
subject characters to complete the match. In addition, either at least one
|
||||
character must have been inspected or the pattern must contain a lookbehind, or
|
||||
subject characters to complete the match. In addition, either at least one
|
||||
character must have been inspected or the pattern must contain a lookbehind, or
|
||||
the pattern must be one that could match an empty string.
|
||||
.P
|
||||
If this situation arises when PCRE2_PARTIAL_SOFT (but not PCRE2_PARTIAL_HARD)
|
||||
|
|
|
@ -171,7 +171,7 @@ different way and is not Perl-compatible.
|
|||
the start of a pattern that set overall options that cannot be changed within
|
||||
the pattern.
|
||||
.sp
|
||||
(m) PCRE2 supports non-atomic positive lookaround assertions. This is an
|
||||
(m) PCRE2 supports non-atomic positive lookaround assertions. This is an
|
||||
extension to the lookaround facilities. The default, Perl-compatible
|
||||
lookarounds are atomic.
|
||||
.P
|
||||
|
|
|
@ -617,7 +617,7 @@ then 3 again to be output. By default, there is no separator (but see the next
|
|||
but one option).
|
||||
.TP
|
||||
\fB--om-capture\fP=\fInumber\fP
|
||||
Set the number of capturing parentheses that can be accessed by \fB-o\fP. The
|
||||
Set the number of capturing parentheses that can be accessed by \fB-o\fP. The
|
||||
default is 50.
|
||||
.TP
|
||||
\fB--om-separator\fP=\fItext\fP
|
||||
|
|
|
@ -158,7 +158,7 @@ code unit) at a time, for all active paths through the tree.
|
|||
9. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
|
||||
supported. (*FAIL) is supported, and behaves like a failing negative assertion.
|
||||
.P
|
||||
10. The PCRE2_MATCH_INVALID_UTF option for \fBpcre2_compile()\fP is not
|
||||
10. The PCRE2_MATCH_INVALID_UTF option for \fBpcre2_compile()\fP is not
|
||||
supported by \fBpcre2_dfa_match()\fP.
|
||||
.
|
||||
.
|
||||
|
@ -194,7 +194,7 @@ The alternative algorithm suffers from a number of disadvantages:
|
|||
because it has to search for all possible matches, but is also because it is
|
||||
less susceptible to optimization.
|
||||
.P
|
||||
2. Capturing parentheses, backreferences, script runs, and matching within
|
||||
2. Capturing parentheses, backreferences, script runs, and matching within
|
||||
invalid UTF string are not supported.
|
||||
.P
|
||||
3. Although atomic groups are supported, their use does not provide the
|
||||
|
|
|
@ -242,9 +242,9 @@ Note the use of the \fBoffset\fP modifier to start the new match where the
|
|||
partial match was found. In this example, the next segment was added to the one
|
||||
in which the partial match was found. This is the most straightforward
|
||||
approach, typically using a memory buffer that is twice the size of each
|
||||
segment. After a partial match, the first half of the buffer is discarded, the
|
||||
second half is moved to the start of the buffer, and a new segment is added
|
||||
before repeating the match as in the example above. After a no match, the
|
||||
segment. After a partial match, the first half of the buffer is discarded, the
|
||||
second half is moved to the start of the buffer, and a new segment is added
|
||||
before repeating the match as in the example above. After a no match, the
|
||||
entire buffer can be discarded.
|
||||
.P
|
||||
If there are memory constraints, you may want to discard text that precedes a
|
||||
|
|
|
@ -2028,8 +2028,8 @@ Earlier versions of Perl and PCRE1 used to give an error at compile time for
|
|||
such patterns. However, because there are cases where this can be useful, such
|
||||
patterns are now accepted, but whenever an iteration of such a group matches no
|
||||
characters, matching moves on to the next item in the pattern instead of
|
||||
repeatedly matching an empty string. This does not prevent backtracking into
|
||||
any of the iterations if a subsequent item fails to match.
|
||||
repeatedly matching an empty string. This does not prevent backtracking into
|
||||
any of the iterations if a subsequent item fails to match.
|
||||
.P
|
||||
By default, quantifiers are "greedy", that is, they match as much as possible
|
||||
(up to the maximum number of permitted times), without causing the rest of the
|
||||
|
@ -2378,7 +2378,7 @@ in the subject string reset to what it was before the assertion was processed.
|
|||
.P
|
||||
The Perl-compatible lookaround assertions are atomic. If an assertion is true,
|
||||
but there is a subsequent matching failure, there is no backtracking into the
|
||||
assertion. However, there are some cases where non-atomic assertions can be
|
||||
assertion. However, there are some cases where non-atomic assertions can be
|
||||
useful. PCRE2 has some support for these, described in the section entitled
|
||||
.\" HTML <a href="#nonatomicassertions">
|
||||
.\" </a>
|
||||
|
@ -2634,11 +2634,11 @@ characters that are not "999".
|
|||
The traditional Perl-compatible lookaround assertions are atomic. That is, if
|
||||
an assertion is true, but there is a subsequent matching failure, there is no
|
||||
backtracking into the assertion. However, there are some cases where non-atomic
|
||||
positive assertions can be useful. PCRE2 provides these using the following
|
||||
positive assertions can be useful. PCRE2 provides these using the following
|
||||
syntax:
|
||||
.sp
|
||||
(*non_atomic_positive_lookahead: or (*napla:
|
||||
(*non_atomic_positive_lookbehind: or (*naplb:
|
||||
(*non_atomic_positive_lookbehind: or (*naplb:
|
||||
.sp
|
||||
Consider the problem of finding the right-most word in a string that also
|
||||
appears earlier in the string, that is, it must appear at least twice in total.
|
||||
|
@ -2646,8 +2646,8 @@ This pattern returns the required result as captured substring 1:
|
|||
.sp
|
||||
^(?x)(*napla: .* \eb(\ew++)) (?> .*? \eb\e1\eb ){2}
|
||||
.sp
|
||||
For a subject such as "word1 word2 word3 word2 word3 word4" the result is
|
||||
"word3". How does it work? At the start, ^(?x) anchors the pattern and sets the
|
||||
For a subject such as "word1 word2 word3 word2 word3 word4" the result is
|
||||
"word3". How does it work? At the start, ^(?x) anchors the pattern and sets the
|
||||
"x" option, which causes white space (introduced for readability) to be
|
||||
ignored. Inside the assertion, the greedy .* at first consumes the entire
|
||||
string, but then has to backtrack until the rest of the assertion can match a
|
||||
|
@ -2655,9 +2655,9 @@ word, which is captured by group 1. In other words, when the assertion first
|
|||
succeeds, it captures the right-most word in the string.
|
||||
.P
|
||||
The current matching point is then reset to the start of the subject, and the
|
||||
rest of the pattern match checks for two occurrences of the captured word,
|
||||
using an ungreedy .*? to scan from the left. If this succeeds, we are done, but
|
||||
if the last word in the string does not occur twice, this part of the pattern
|
||||
rest of the pattern match checks for two occurrences of the captured word,
|
||||
using an ungreedy .*? to scan from the left. If this succeeds, we are done, but
|
||||
if the last word in the string does not occur twice, this part of the pattern
|
||||
fails. If a traditional atomic lookhead (?= or (*pla: had been used, the
|
||||
assertion could not be re-entered, and the whole match would fail. The pattern
|
||||
would succeed only if the very last word in the subject was found twice.
|
||||
|
@ -3808,10 +3808,10 @@ assertion is not "seen" by an instance of (*SKIP:NAME) later in the pattern.
|
|||
.P
|
||||
PCRE2 now supports non-atomic positive assertions, as described in the section
|
||||
entitled
|
||||
.\" HTML <a href="#nonatomicassertions">
|
||||
.\" </a>
|
||||
"Non-atomic assertions"
|
||||
.\"
|
||||
.\" HTML <a href="#nonatomicassertions">
|
||||
.\" </a>
|
||||
"Non-atomic assertions"
|
||||
.\"
|
||||
above. These assertions must be standalone (not used as conditions). They are
|
||||
not Perl-compatible. For these assertions, a later backtrack does jump back
|
||||
into the assertion, and therefore verbs such as (*COMMIT) can be triggered by
|
||||
|
|
|
@ -531,7 +531,7 @@ Each top-level branch of a lookbehind must be of a fixed length.
|
|||
.sp
|
||||
These assertions are specific to PCRE2 and are not Perl-compatible.
|
||||
.sp
|
||||
(*napla:...)
|
||||
(*napla:...)
|
||||
(*non_atomic_positive_lookahead:...)
|
||||
.sp
|
||||
(*naplb:...)
|
||||
|
|
|
@ -205,7 +205,7 @@ compilation, each pattern is passed to the just-in-time compiler, if available.
|
|||
\fB-jitfast\fP
|
||||
Behave as if each pattern line has the \fBjitfast\fP modifier; after
|
||||
successful compilation, each pattern is passed to the just-in-time compiler, if
|
||||
available, and each subject line is passed directly to the JIT matcher via its
|
||||
available, and each subject line is passed directly to the JIT matcher via its
|
||||
"fast path".
|
||||
.TP 10
|
||||
\fB-jitverify\fP
|
||||
|
@ -578,7 +578,7 @@ for a description of the effects of these options.
|
|||
firstline set PCRE2_FIRSTLINE
|
||||
literal set PCRE2_LITERAL
|
||||
match_line set PCRE2_EXTRA_MATCH_LINE
|
||||
match_invalid_utf set PCRE2_MATCH_INVALID_UTF
|
||||
match_invalid_utf set PCRE2_MATCH_INVALID_UTF
|
||||
match_unset_backref set PCRE2_MATCH_UNSET_BACKREF
|
||||
match_word set PCRE2_EXTRA_MATCH_WORD
|
||||
/m multiline set PCRE2_MULTILINE
|
||||
|
@ -701,8 +701,8 @@ options, the line is omitted. "First code unit" is where any match must start;
|
|||
if there is more than one they are listed as "starting code units". "Last code
|
||||
unit" is the last literal code unit that must be present in any match. This is
|
||||
not necessarily the last character. These lines are omitted if no starting or
|
||||
ending code units are recorded. The subject length line is omitted when
|
||||
\fBno_start_optimize\fP is set because the minimum length is not calculated
|
||||
ending code units are recorded. The subject length line is omitted when
|
||||
\fBno_start_optimize\fP is set because the minimum length is not calculated
|
||||
when it can never be used.
|
||||
.P
|
||||
The \fBframesize\fP modifier shows the size, in bytes, of the storage frames
|
||||
|
@ -1245,7 +1245,7 @@ Here is an example:
|
|||
.sp
|
||||
The first, complete match shows that the matched string is "abc", with the
|
||||
preceding and following strings "pqr" and "xyz" having been consulted during
|
||||
the match (when processing the assertions). The partial match can indicate only
|
||||
the match (when processing the assertions). The partial match can indicate only
|
||||
the preceding string.
|
||||
.P
|
||||
The \fBstartchar\fP modifier requests that the starting character for the match
|
||||
|
|
|
@ -11,15 +11,15 @@ text strings in UTF-8, UTF-16, or UTF-32 format (depending on the code unit
|
|||
width), but this is not the default. Unless specifically requested, PCRE2
|
||||
treats each code unit in a string as one character.
|
||||
.P
|
||||
There are two ways of telling PCRE2 to switch to UTF mode, where characters may
|
||||
consist of more than one code unit and the range of values is constrained. The
|
||||
There are two ways of telling PCRE2 to switch to UTF mode, where characters may
|
||||
consist of more than one code unit and the range of values is constrained. The
|
||||
program can call
|
||||
.\" HREF
|
||||
\fBpcre2_compile()\fP
|
||||
.\"
|
||||
with the PCRE2_UTF option, or the pattern may start with the sequence (*UTF).
|
||||
However, the latter facility can be locked out by the PCRE2_NEVER_UTF option.
|
||||
That is, the programmer can prevent the supplier of the pattern from switching
|
||||
That is, the programmer can prevent the supplier of the pattern from switching
|
||||
to UTF mode.
|
||||
.P
|
||||
Note that the PCRE2_MATCH_INVALID_UTF option (see
|
||||
|
@ -403,7 +403,7 @@ You can run pattern matches on subject strings that may contain invalid UTF
|
|||
sequences if you call \fBpcre2_compile()\fP with the PCRE2_MATCH_INVALID_UTF
|
||||
option. This is supported by \fBpcre2_match()\fP, including JIT matching, but
|
||||
not by \fBpcre2_dfa_match()\fP. When PCRE2_MATCH_INVALID_UTF is set, it forces
|
||||
PCRE2_UTF to be set as well. Note, however, that the pattern itself must be a
|
||||
PCRE2_UTF to be set as well. Note, however, that the pattern itself must be a
|
||||
valid UTF string.
|
||||
.P
|
||||
Setting PCRE2_MATCH_INVALID_UTF does not affect what \fBpcre2_compile()\fP
|
||||
|
@ -433,7 +433,7 @@ UTF-sequence, that sequence is skipped, and the match starts at the next valid
|
|||
UTF character, or the end of the subject.
|
||||
.P
|
||||
At internal fragment boundaries, \eb and \eB behave in the same way as at the
|
||||
beginning and end of the subject. For example, a sequence such as \ebWORD\eb
|
||||
beginning and end of the subject. For example, a sequence such as \ebWORD\eb
|
||||
would match an instance of WORD that is surrounded by invalid UTF code units.
|
||||
.P
|
||||
Using PCRE2_MATCH_INVALID_UTF, an application can run matches on arbitrary
|
||||
|
|
|
@ -218,7 +218,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.33"
|
||||
#define PACKAGE_STRING "PCRE2 10.34-RC1"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
@ -227,7 +227,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.33"
|
||||
#define PACKAGE_VERSION "10.34-RC1"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
|
@ -352,7 +352,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#endif
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.33"
|
||||
#define VERSION "10.34-RC1"
|
||||
|
||||
/* Define to 1 if on MINIX. */
|
||||
/* #undef _MINIX */
|
||||
|
|
|
@ -44,7 +44,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 34
|
||||
#define PCRE2_PRERELEASE -RC1
|
||||
#define PCRE2_DATE 2019-04-22
|
||||
#define PCRE2_DATE 2019-10-15
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
|
@ -779,7 +779,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
|||
pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \
|
||||
*pcre2_maketables(pcre2_general_context *); \
|
||||
|
||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
|
||||
|
||||
/* Define macros that generate width-specific names from generic versions. The
|
||||
three-level macro scheme is necessary to get the macros expanded when we want
|
||||
|
@ -855,6 +856,7 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||
#define pcre2_maketables_free PCRE2_SUFFIX(pcre2_maketables_free_)
|
||||
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
|
||||
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
|
||||
|
|
|
@ -624,13 +624,13 @@ for(;;)
|
|||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ONCE:
|
||||
return !entered_a_group;
|
||||
|
||||
/* Non-atomic assertions - don't possessify last iterator. This needs
|
||||
|
||||
/* Non-atomic assertions - don't possessify last iterator. This needs
|
||||
more thought. */
|
||||
|
||||
|
||||
case OP_ASSERT_NA:
|
||||
case OP_ASSERTBACK_NA:
|
||||
return FALSE;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Skip over the bracket and inspect what comes next. */
|
||||
|
|
|
@ -5595,16 +5595,16 @@ for (;; pptr++)
|
|||
#endif
|
||||
{
|
||||
uint32_t d;
|
||||
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && c > 127) d = UCD_OTHERCASE(c); else
|
||||
#endif
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 255) d = c; else
|
||||
#endif
|
||||
#endif
|
||||
d = TABLE_GET(c, cb->fcc, c);
|
||||
}
|
||||
}
|
||||
|
||||
if (c != d && pptr[2] == d)
|
||||
{
|
||||
|
|
|
@ -3678,7 +3678,7 @@ for (;;)
|
|||
pp2 = memchr(start_match, first_cu2, cu2size);
|
||||
memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL);
|
||||
}
|
||||
|
||||
|
||||
if (pp1 == NULL)
|
||||
start_match = (pp2 == NULL)? end_subject : pp2;
|
||||
else
|
||||
|
|
|
@ -270,7 +270,7 @@ static const unsigned char match_error_texts[] =
|
|||
"invalid syntax\0"
|
||||
/* 65 */
|
||||
"internal error - duplicate substitution match\0"
|
||||
"PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0"
|
||||
"PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0"
|
||||
;
|
||||
|
||||
|
||||
|
|
|
@ -853,7 +853,7 @@ typedef struct match_block {
|
|||
uint32_t match_call_count; /* Number of times a new frame is created */
|
||||
BOOL hitend; /* Hit the end of the subject at some point */
|
||||
BOOL hasthen; /* Pattern contains (*THEN) */
|
||||
BOOL allowemptypartial; /* Allow empty hard partial */
|
||||
BOOL allowemptypartial; /* Allow empty hard partial */
|
||||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
const uint8_t *fcc; /* Points to case-flipping table */
|
||||
const uint8_t *ctypes; /* Points to table of type maps */
|
||||
|
@ -909,7 +909,7 @@ typedef struct dfa_match_block {
|
|||
uint32_t poptions; /* Pattern options */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
BOOL allowemptypartial; /* Allow empty hard partial */
|
||||
BOOL allowemptypartial; /* Allow empty hard partial */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
uint16_t bsr_convention; /* \R interpretation */
|
||||
pcre2_callout_block *cb; /* Points to a callout block */
|
||||
|
|
|
@ -393,7 +393,7 @@ for(;;)
|
|||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ASSERT_NA:
|
||||
case OP_ASSERTBACK_NA:
|
||||
case OP_ASSERTBACK_NA:
|
||||
case OP_ONCE:
|
||||
case OP_SCRIPT_RUN:
|
||||
case OP_COND:
|
||||
|
|
|
@ -1738,17 +1738,17 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
|
|||
}
|
||||
}
|
||||
|
||||
/* Replace the start code unit bits with a first code unit, but only if it
|
||||
/* Replace the start code unit bits with a first code unit, but only if it
|
||||
is not the same as a required later code unit. This is because a search for
|
||||
a required code unit starts after an explicit first code unit, but at a
|
||||
code unit found from the bitmap. Patterns such as /a*a/ don't work
|
||||
code unit found from the bitmap. Patterns such as /a*a/ don't work
|
||||
if both the start unit and required unit are the same. */
|
||||
|
||||
if (a >= 0 &&
|
||||
if (a >= 0 &&
|
||||
(
|
||||
(re->flags & PCRE2_LASTSET) == 0 ||
|
||||
(re->flags & PCRE2_LASTSET) == 0 ||
|
||||
(
|
||||
re->last_codeunit != (uint32_t)a &&
|
||||
re->last_codeunit != (uint32_t)a &&
|
||||
(b < 0 || re->last_codeunit != (uint32_t)b)
|
||||
)
|
||||
))
|
||||
|
|
Loading…
Reference in New Issue