File tidies for 10.36-RC1
This commit is contained in:
parent
dc426be88e
commit
000bbf2ea7
4
AUTHORS
4
AUTHORS
|
@ -2,8 +2,8 @@ THE MAIN PCRE2 LIBRARY CODE
|
|||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
|
|
@ -149,7 +149,7 @@ CHECK_C_SOURCE_COMPILES(
|
|||
set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS})
|
||||
|
||||
# Check whether Intel CET is enabled, and if so, adjust compiler flags. This
|
||||
# code was written by PH, trying to imitate the logic from the autotools
|
||||
# code was written by PH, trying to imitate the logic from the autotools
|
||||
# configuration.
|
||||
|
||||
CHECK_C_SOURCE_COMPILES(
|
||||
|
@ -162,8 +162,8 @@ CHECK_C_SOURCE_COMPILES(
|
|||
|
||||
IF (INTEL_CET_ENABLED)
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk")
|
||||
ENDIF(INTEL_CET_ENABLED)
|
||||
|
||||
ENDIF(INTEL_CET_ENABLED)
|
||||
|
||||
|
||||
|
||||
# User-configurable options
|
||||
|
|
72
ChangeLog
72
ChangeLog
|
@ -1,12 +1,12 @@
|
|||
Change Log for PCRE2
|
||||
--------------------
|
||||
|
||||
Version 10.36-RC1 25-May-2020
|
||||
-----------------------------
|
||||
Version 10.36-RC1 05-November-2020
|
||||
----------------------------------
|
||||
|
||||
1. Add CET_CFLAGS so that when Intel CET is enabled, pass -mshstk to
|
||||
compiler. This fixes https://bugs.exim.org/show_bug.cgi?id=2578. Patch for
|
||||
Makefile.am and configure.ac by H.J. Lu. Equivalent patch for CMakeLists.txt
|
||||
compiler. This fixes https://bugs.exim.org/show_bug.cgi?id=2578. Patch for
|
||||
Makefile.am and configure.ac by H.J. Lu. Equivalent patch for CMakeLists.txt
|
||||
invented by PH.
|
||||
|
||||
2. Fix inifinite loop when a single byte newline is searched in JIT when
|
||||
|
@ -18,12 +18,12 @@ invalid utf8 mode is enabled.
|
|||
lib. This allows differentiation between lib and lib64.
|
||||
CMAKE_INSTALL_LIBDIR is used for installation of libraries and also for
|
||||
pkgconfig file generation.
|
||||
|
||||
|
||||
- Add the version of PCRE2 to the configuration summary like ./configure
|
||||
does.
|
||||
|
||||
|
||||
- Fix typo: MACTHED_STRING->MATCHED_STRING
|
||||
|
||||
|
||||
4. Updated CMakeLists.txt with another patch from Wolfgang Stöggl (Bugzilla
|
||||
#2588):
|
||||
|
||||
|
@ -31,12 +31,12 @@ invalid utf8 mode is enabled.
|
|||
allow spaces in directory names.
|
||||
|
||||
- This fixes a cmake error, if the path of the pcre2 source contains a space.
|
||||
|
||||
5. Updated CMakeLists.txt with a patch from B. Scott Michel: CMake's
|
||||
documentation suggests using CHECK_SYMBOL_EXISTS over CHECK_FUNCTION_EXIST.
|
||||
Moreover, these functions come from specific header files, which need to be
|
||||
specified (and, thankfully, are the same on both the Linux and WinXX
|
||||
platforms.)
|
||||
|
||||
5. Updated CMakeLists.txt with a patch from B. Scott Michel: CMake's
|
||||
documentation suggests using CHECK_SYMBOL_EXISTS over CHECK_FUNCTION_EXIST.
|
||||
Moreover, these functions come from specific header files, which need to be
|
||||
specified (and, thankfully, are the same on both the Linux and WinXX
|
||||
platforms.)
|
||||
|
||||
6. Added a (uint32_t) cast to prevent a compiler warning in pcre2_compile.c.
|
||||
|
||||
|
@ -44,50 +44,50 @@ platforms.)
|
|||
debug Windows builds using CMake. This also updated configure so that it
|
||||
generates *.pc files and pcre2-config with the same content, as in the past.
|
||||
|
||||
8. If a pattern ended with (?(VERSION=n.d where n is any number but d is just a
|
||||
single digit, the code unit beyond d was being read (i.e. there was a read
|
||||
8. If a pattern ended with (?(VERSION=n.d where n is any number but d is just a
|
||||
single digit, the code unit beyond d was being read (i.e. there was a read
|
||||
buffer overflow). Fixes ClusterFuzz 23779.
|
||||
|
||||
9. After the rework in r1235, certain character ranges were incorrectly
|
||||
handled by an optimization in JIT. Furthermore a wrong offset was used to
|
||||
read a value from a buffer which could lead to memory overread.
|
||||
|
||||
10. Unnoticed for many years was the fact that delimiters other than / in the
|
||||
testinput1 and testinput4 files could cause incorrect behaviour when these
|
||||
files were processed by perltest.sh. There were several tests that used quotes
|
||||
10. Unnoticed for many years was the fact that delimiters other than / in the
|
||||
testinput1 and testinput4 files could cause incorrect behaviour when these
|
||||
files were processed by perltest.sh. There were several tests that used quotes
|
||||
as delimiters, and it was just luck that they didn't go wrong with perltest.sh.
|
||||
All the patterns in testinput1 and testinput4 now use / as their delimiter.
|
||||
All the patterns in testinput1 and testinput4 now use / as their delimiter.
|
||||
This fixes Bugzilla #2641.
|
||||
|
||||
11. Perl has started to give an error for \K within lookarounds (though there
|
||||
are cases where it doesn't). PCRE2 still allows this, so the tests that include
|
||||
11. Perl has started to give an error for \K within lookarounds (though there
|
||||
are cases where it doesn't). PCRE2 still allows this, so the tests that include
|
||||
this case have been moved from test 1 to test 2.
|
||||
|
||||
12. Further to 10 above, pcre2test has been updated to detect and grumble if a
|
||||
12. Further to 10 above, pcre2test has been updated to detect and grumble if a
|
||||
delimiter other than / is used after #perltest.
|
||||
|
||||
13. Fixed a bug with PCRE2_MATCH_INVALID_UTF in 8-bit mode when PCRE2_CASELESS
|
||||
was set and PCRE2_NO_START_OPTIMIZE was not set. The optimization for finding
|
||||
the start of a match was not resetting correctly after a failed match on the
|
||||
first valid fragment of the subject, possibly causing incorrect "no match"
|
||||
returns on subsequent fragments. For example, the pattern /A/ failed to match
|
||||
the subject \xe5A. Fixes Bugzilla #2642.
|
||||
13. Fixed a bug with PCRE2_MATCH_INVALID_UTF in 8-bit mode when PCRE2_CASELESS
|
||||
was set and PCRE2_NO_START_OPTIMIZE was not set. The optimization for finding
|
||||
the start of a match was not resetting correctly after a failed match on the
|
||||
first valid fragment of the subject, possibly causing incorrect "no match"
|
||||
returns on subsequent fragments. For example, the pattern /A/ failed to match
|
||||
the subject \xe5A. Fixes Bugzilla #2642.
|
||||
|
||||
14. Fixed a bug in character set matching when JIT is enabled and both unicode
|
||||
scripts and unicode classes are present at the same time.
|
||||
|
||||
15. Added GNU grep's -m (aka --max-count) option to pcre2grep.
|
||||
|
||||
16. Refactored substitution processing in pcre2grep strings, both for the -O
|
||||
option and when dealing with callouts. There is now a single function that
|
||||
handles $ expansion in all cases (instead of multiple copies of almost
|
||||
identical code). This means that the same escape sequences are available
|
||||
everywhere, which was not previously the case. At the same time, the escape
|
||||
sequences $x{...} and $o{...} have been introduced, to allow for characters
|
||||
16. Refactored substitution processing in pcre2grep strings, both for the -O
|
||||
option and when dealing with callouts. There is now a single function that
|
||||
handles $ expansion in all cases (instead of multiple copies of almost
|
||||
identical code). This means that the same escape sequences are available
|
||||
everywhere, which was not previously the case. At the same time, the escape
|
||||
sequences $x{...} and $o{...} have been introduced, to allow for characters
|
||||
whose code points are greater than 255 in Unicode mode.
|
||||
|
||||
17. Applied the patch from Bugzilla #2628 to RunGrepTest. This does an explicit
|
||||
test for a version of sed that can handle binary zero, instead of assuming that
|
||||
17. Applied the patch from Bugzilla #2628 to RunGrepTest. This does an explicit
|
||||
test for a version of sed that can handle binary zero, instead of assuming that
|
||||
any Linux version will work.
|
||||
|
||||
18. Fixed a word boundary check bug in JIT when partial matching is enabled.
|
||||
|
|
4
LICENCE
4
LICENCE
|
@ -20,8 +20,8 @@ THE BASIC LIBRARY FUNCTIONS
|
|||
---------------------------
|
||||
|
||||
Written by: Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
|
|
@ -448,15 +448,15 @@ EXTRA_DIST += \
|
|||
src/sljit/sljitNativePPC_32.c \
|
||||
src/sljit/sljitNativePPC_64.c \
|
||||
src/sljit/sljitNativePPC_common.c \
|
||||
src/sljit/sljitNativeS390X.c \
|
||||
src/sljit/sljitNativeSPARC_32.c \
|
||||
src/sljit/sljitNativeSPARC_common.c \
|
||||
src/sljit/sljitNativeTILEGX-encoder.c \
|
||||
src/sljit/sljitNativeTILEGX_64.c \
|
||||
src/sljit/sljitNativeX86_32.c \
|
||||
src/sljit/sljitNativeX86_64.c \
|
||||
src/sljit/sljitNativeX86_common.c \
|
||||
src/sljit/sljitProtExecAllocator.c \
|
||||
src/sljit/sljitUtils.c
|
||||
src/sljit/sljitUtils.c \
|
||||
src/sljit/sljitWXExecAllocator.c
|
||||
|
||||
# Some of the JIT sources are also in separate files that are #included.
|
||||
|
||||
|
|
10
NEWS
10
NEWS
|
@ -2,6 +2,16 @@ News about PCRE2 releases
|
|||
-------------------------
|
||||
|
||||
|
||||
Version 10.36 05-November-2020
|
||||
------------------------------
|
||||
|
||||
Again, mainly bug fixes and tidies. The only enhancements are the addition of
|
||||
GNU grep's -m (aka --max-count) option to pcre2grep, and also unifying the
|
||||
handling of substitution strings for both -O and callouts in pcre2grep, with
|
||||
the addition of $x{...} and $o{...} to allow for characters whose code points
|
||||
are greater than 255 in Unicode mode.
|
||||
|
||||
|
||||
Version 10.35 15-April-2020
|
||||
---------------------------
|
||||
|
||||
|
|
6
README
6
README
|
@ -297,8 +297,8 @@ library. They are also documented in the pcre2build man page.
|
|||
unaddressable. This allows it to detect invalid memory accesses, and is
|
||||
mostly useful for debugging PCRE2 itself.
|
||||
|
||||
. In environments where the gcc compiler is used and lcov version 1.6 or above
|
||||
is installed, if you specify
|
||||
. In environments where the gcc compiler is used and lcov is installed, if you
|
||||
specify
|
||||
|
||||
--enable-coverage
|
||||
|
||||
|
@ -894,4 +894,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
Last updated: 22 September 2020
|
||||
Last updated: 06 November 2020
|
||||
|
|
|
@ -674,11 +674,11 @@ echo "---------------------------- Test 131 -----------------------------" >>tes
|
|||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 132 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -A3 '^match'; echo '---'; head -1) <testdata/grepinput >>testtrygrep 2>&1
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -A3 '^match'; echo '---'; head -1) <$srcdir/testdata/grepinput >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test 133 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <testdata/grepinputv >>testtrygrep 2>&1
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
# Now compare the results.
|
||||
|
@ -715,7 +715,7 @@ if [ $utf8 -ne 0 ] ; then
|
|||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
echo "---------------------------- Test U6 -----------------------------" >>testtrygrep
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -u -m1 -O '=$x{1d3}$o{744}=' 'fox') <testdata/grepinputv >>testtrygrep 2>&1
|
||||
(cd $srcdir; $valgrind $vjs $pcre2grep -u -m1 -O '=$x{1d3}$o{744}=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
|
||||
echo "RC=$?" >>testtrygrep
|
||||
|
||||
$cf $srcdir/testdata/grepoutput8 testtrygrep
|
||||
|
@ -759,7 +759,7 @@ $valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgr
|
|||
# systems, including Solaris (aka SunOS), where the version of sed explicitly
|
||||
# doesn't like them, and also MacOS (Darwin), OpenBSD, FreeBSD, NetBSD, and
|
||||
# some Linux distributions like Alpine, even when using GNU sed, so test for
|
||||
# a usable sed and fudge the output so that the comparison works when sed
|
||||
# a usable sed and fudge the output so that the comparison works when sed
|
||||
# doesn't.
|
||||
|
||||
printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
|
||||
|
|
|
@ -11,12 +11,12 @@ dnl be defined as -RC2, for example. For real releases, it should be empty.
|
|||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [36])
|
||||
m4_define(pcre2_prerelease, [-RC1])
|
||||
m4_define(pcre2_date, [2020-06-01])
|
||||
m4_define(pcre2_date, [2020-11-05])
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre2_8_version, [10:0:10])
|
||||
m4_define(libpcre2_16_version, [10:0:10])
|
||||
m4_define(libpcre2_32_version, [10:0:10])
|
||||
m4_define(libpcre2_8_version, [10:1:10])
|
||||
m4_define(libpcre2_16_version, [10:1:10])
|
||||
m4_define(libpcre2_32_version, [10:1:10])
|
||||
m4_define(libpcre2_posix_version, [2:3:0])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
|
|
|
@ -297,8 +297,8 @@ library. They are also documented in the pcre2build man page.
|
|||
unaddressable. This allows it to detect invalid memory accesses, and is
|
||||
mostly useful for debugging PCRE2 itself.
|
||||
|
||||
. In environments where the gcc compiler is used and lcov version 1.6 or above
|
||||
is installed, if you specify
|
||||
. In environments where the gcc compiler is used and lcov is installed, if you
|
||||
specify
|
||||
|
||||
--enable-coverage
|
||||
|
||||
|
@ -894,4 +894,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
Last updated: 22 September 2020
|
||||
Last updated: 06 November 2020
|
||||
|
|
|
@ -662,11 +662,11 @@ is valid" flag (that can be updated atomically) must be used:
|
|||
if (!pointer_is_valid)
|
||||
{
|
||||
Get a write (unique) lock for pointer
|
||||
if (!pointer_is_valid)
|
||||
if (!pointer_is_valid)
|
||||
{
|
||||
pointer = pcre2_compile(...
|
||||
pointer_is_valid = TRUE
|
||||
}
|
||||
}
|
||||
}
|
||||
Release the lock
|
||||
Use pointer in pcre2_match()
|
||||
|
|
|
@ -34,13 +34,13 @@ that the next three characters are not "a". It just asserts that the next
|
|||
character is not "a" three times (in principle; PCRE2 optimizes this to run the
|
||||
assertion just once). Perl allows some repeat quantifiers on other assertions,
|
||||
for example, \b* (but not \b{3}, though oddly it does allow ^{3}), but these
|
||||
do not seem to have any use. PCRE2 does not allow any kind of quantifier on
|
||||
do not seem to have any use. PCRE2 does not allow any kind of quantifier on
|
||||
non-lookaround assertions.
|
||||
</P>
|
||||
<P>
|
||||
3. Capture groups that occur inside negative lookaround assertions are counted,
|
||||
but their entries in the offsets vector are set only when a negative assertion
|
||||
is a condition that has a matching branch (that is, the condition is false).
|
||||
is a condition that has a matching branch (that is, the condition is false).
|
||||
Perl may set such capture groups in other circumstances.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -84,7 +84,7 @@ other character. Note the following examples:
|
|||
\QA\B\E A\B A\B
|
||||
\Q\\E \ \\E
|
||||
</pre>
|
||||
The \Q...\E sequence is recognized both inside and outside character classes
|
||||
The \Q...\E sequence is recognized both inside and outside character classes
|
||||
by both PCRE2 and Perl.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -152,8 +152,8 @@ in the release at the time of writing (5.32), \p{Lu} and \p{Ll} match all
|
|||
letters, regardless of case, when case independence is specified.
|
||||
</P>
|
||||
<P>
|
||||
16. From release 5.32.0, Perl locks out the use of \K in lookaround
|
||||
assertions. In PCRE2, \K is acted on when it occurs in positive assertions,
|
||||
16. From release 5.32.0, Perl locks out the use of \K in lookaround
|
||||
assertions. In PCRE2, \K is acted on when it occurs in positive assertions,
|
||||
but is ignored in negative assertions.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -111,7 +111,7 @@ matching substrings, or if <b>--only-matching</b>, <b>--file-offsets</b>, or
|
|||
(either shown literally, or as an offset), scanning resumes immediately
|
||||
following the match, so that further matches on the same line can be found. If
|
||||
there are multiple patterns, they are all tried on the remainder of the line,
|
||||
but patterns that follow the one that matched are not tried on the earlier
|
||||
but patterns that follow the one that matched are not tried on the earlier
|
||||
matched part of the line.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -486,7 +486,7 @@ a separate line. Searching normally stops as soon as a matching line is found
|
|||
in a file. However, if the <b>-c</b> (count) option is also used, matching
|
||||
continues in order to obtain the correct count, and those files that have at
|
||||
least one match are listed along with their counts. Using this option with
|
||||
<b>-c</b> is a way of suppressing the listing of files with no matches that
|
||||
<b>-c</b> is a way of suppressing the listing of files with no matches that
|
||||
occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>,
|
||||
<b>-h</b>, or <b>-L</b> options.
|
||||
</P>
|
||||
|
@ -561,12 +561,12 @@ does not work when input is read line by line (see <b>--line-buffered</b>.)
|
|||
</P>
|
||||
<P>
|
||||
<b>-m</b> <i>number</i>, <b>--max-count</b>=<i>number</i>
|
||||
Stop processing after finding <i>number</i> matching lines, or non-matching
|
||||
Stop processing after finding <i>number</i> matching lines, or non-matching
|
||||
lines if <b>-v</b> is also set. Any trailing context lines are output after the
|
||||
final match. In multiline mode, each multiline match counts as just one line
|
||||
for this purpose. If this limit is reached when reading the standard input from
|
||||
a regular file, the file is left positioned just after the last matching line.
|
||||
If <b>-c</b> is also set, the count that is output is never greater than
|
||||
If <b>-c</b> is also set, the count that is output is never greater than
|
||||
<i>number</i>. This option has no effect if used with <b>-L</b>, <b>-l</b>, or
|
||||
<b>-q</b>, or when just checking for a match in a binary file.
|
||||
</P>
|
||||
|
@ -686,7 +686,7 @@ newline; $r by carriage return; $t by tab; $v by vertical tab.
|
|||
<br>
|
||||
$o<digits> or $o{<digits>} is replaced by the character whose code point is the
|
||||
given octal number. In the first form, up to three octal digits are processed.
|
||||
When more digits are needed in Unicode mode to specify a wide character, the
|
||||
When more digits are needed in Unicode mode to specify a wide character, the
|
||||
second form must be used.
|
||||
<br>
|
||||
<br>
|
||||
|
@ -788,13 +788,13 @@ total would always be zero.
|
|||
Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
|
||||
with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
|
||||
<b>--include</b> options) and all lines that are scanned must be valid strings
|
||||
of UTF-8 characters. If an invalid UTF-8 string is encountered, an error
|
||||
of UTF-8 characters. If an invalid UTF-8 string is encountered, an error
|
||||
occurs.
|
||||
</P>
|
||||
<P>
|
||||
<b>-U</b>, <b>--utf-allow-invalid</b>
|
||||
As <b>--utf</b>, but in addition subject lines may contain invalid UTF-8 code
|
||||
unit sequences. These can never form part of any pattern match. Patterns
|
||||
unit sequences. These can never form part of any pattern match. Patterns
|
||||
themselves, however, must still be valid UTF-8 strings. This facility allows
|
||||
valid UTF-8 strings to be sought within arbitrary byte sequences in executable
|
||||
or other binary files. For more details about matching in non-valid UTF-8
|
||||
|
@ -811,7 +811,7 @@ ignored.
|
|||
<P>
|
||||
<b>-v</b>, <b>--invert-match</b>
|
||||
Invert the sense of the match, so that lines which do <i>not</i> match any of
|
||||
the patterns are the ones that are found. When this option is set, options such
|
||||
the patterns are the ones that are found. When this option is set, options such
|
||||
as <b>--only-matching</b> and <b>--output</b>, which specify parts of a match
|
||||
that are to be output, are ignored.
|
||||
</P>
|
||||
|
|
|
@ -337,7 +337,7 @@ part of the pattern. If the PCRE2_EXTENDED_MORE option is set, the same
|
|||
applies, but in addition unescaped space and horizontal tab characters are
|
||||
ignored inside a character class. Note: only these two characters are ignored,
|
||||
not the full set of pattern white space characters that are ignored outside a
|
||||
character class. Option settings can be changed within a pattern; see the
|
||||
character class. Option settings can be changed within a pattern; see the
|
||||
section entitled
|
||||
<a href="#internaloptions">"Internal Option Setting"</a>
|
||||
below.
|
||||
|
|
|
@ -423,7 +423,7 @@ patterns. Modifiers on a pattern can change these settings.
|
|||
This line is used in test files that can also be processed by <b>perltest.sh</b>
|
||||
to confirm that Perl gives the same results as PCRE2. Subsequent tests are
|
||||
checked for the use of <b>pcre2test</b> features that are incompatible with the
|
||||
<b>perltest.sh</b> script.
|
||||
<b>perltest.sh</b> script.
|
||||
</P>
|
||||
<P>
|
||||
Patterns must use '/' as their delimiter, and only certain modifiers are
|
||||
|
|
|
@ -180,8 +180,8 @@ REVISION
|
|||
Last updated: 17 September 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2API(3) Library Functions Manual PCRE2API(3)
|
||||
|
||||
|
||||
|
@ -3829,8 +3829,8 @@ REVISION
|
|||
Last updated: 04 November 2020
|
||||
Copyright (c) 1997-2020 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3)
|
||||
|
||||
|
||||
|
@ -4423,8 +4423,8 @@ REVISION
|
|||
Last updated: 20 March 2020
|
||||
Copyright (c) 1997-2020 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3)
|
||||
|
||||
|
||||
|
@ -4853,8 +4853,8 @@ REVISION
|
|||
Last updated: 03 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3)
|
||||
|
||||
|
||||
|
@ -5066,8 +5066,8 @@ REVISION
|
|||
Last updated: 06 October 2020
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2JIT(3) Library Functions Manual PCRE2JIT(3)
|
||||
|
||||
|
||||
|
@ -5491,8 +5491,8 @@ REVISION
|
|||
Last updated: 23 May 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3)
|
||||
|
||||
|
||||
|
@ -5561,8 +5561,8 @@ REVISION
|
|||
Last updated: 02 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3)
|
||||
|
||||
|
||||
|
@ -5785,8 +5785,8 @@ REVISION
|
|||
Last updated: 23 May 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3)
|
||||
|
||||
|
||||
|
@ -6165,8 +6165,8 @@ REVISION
|
|||
Last updated: 04 September 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3)
|
||||
|
||||
|
||||
|
@ -9613,8 +9613,8 @@ REVISION
|
|||
Last updated: 06 October 2020
|
||||
Copyright (c) 1997-2020 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3)
|
||||
|
||||
|
||||
|
@ -9848,8 +9848,8 @@ REVISION
|
|||
Last updated: 03 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2POSIX(3) Library Functions Manual PCRE2POSIX(3)
|
||||
|
||||
|
||||
|
@ -10178,8 +10178,8 @@ REVISION
|
|||
Last updated: 30 January 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3)
|
||||
|
||||
|
||||
|
@ -10457,8 +10457,8 @@ REVISION
|
|||
Last updated: 27 June 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2SYNTAX(3) Library Functions Manual PCRE2SYNTAX(3)
|
||||
|
||||
|
||||
|
@ -10973,8 +10973,8 @@ REVISION
|
|||
Last updated: 28 December 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3)
|
||||
|
||||
|
||||
|
@ -11408,5 +11408,5 @@ REVISION
|
|||
Last updated: 23 February 2020
|
||||
Copyright (c) 1997-2020 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -598,11 +598,11 @@ is valid" flag (that can be updated atomically) must be used:
|
|||
if (!pointer_is_valid)
|
||||
{
|
||||
Get a write (unique) lock for pointer
|
||||
if (!pointer_is_valid)
|
||||
if (!pointer_is_valid)
|
||||
{
|
||||
pointer = pcre2_compile(...
|
||||
pointer_is_valid = TRUE
|
||||
}
|
||||
}
|
||||
}
|
||||
Release the lock
|
||||
Use pointer in pcre2_match()
|
||||
|
|
|
@ -22,12 +22,12 @@ that the next three characters are not "a". It just asserts that the next
|
|||
character is not "a" three times (in principle; PCRE2 optimizes this to run the
|
||||
assertion just once). Perl allows some repeat quantifiers on other assertions,
|
||||
for example, \eb* (but not \eb{3}, though oddly it does allow ^{3}), but these
|
||||
do not seem to have any use. PCRE2 does not allow any kind of quantifier on
|
||||
do not seem to have any use. PCRE2 does not allow any kind of quantifier on
|
||||
non-lookaround assertions.
|
||||
.P
|
||||
3. Capture groups that occur inside negative lookaround assertions are counted,
|
||||
but their entries in the offsets vector are set only when a negative assertion
|
||||
is a condition that has a matching branch (that is, the condition is false).
|
||||
is a condition that has a matching branch (that is, the condition is false).
|
||||
Perl may set such capture groups in other circumstances.
|
||||
.P
|
||||
4. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu,
|
||||
|
@ -72,7 +72,7 @@ other character. Note the following examples:
|
|||
\eQA\eB\eE A\eB A\eB
|
||||
\eQ\e\eE \e \e\eE
|
||||
.sp
|
||||
The \eQ...\eE sequence is recognized both inside and outside character classes
|
||||
The \eQ...\eE sequence is recognized both inside and outside character classes
|
||||
by both PCRE2 and Perl.
|
||||
.P
|
||||
7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
|
||||
|
@ -132,8 +132,8 @@ always matches an upper case letter. I think Perl has changed in this respect;
|
|||
in the release at the time of writing (5.32), \ep{Lu} and \ep{Ll} match all
|
||||
letters, regardless of case, when case independence is specified.
|
||||
.P
|
||||
16. From release 5.32.0, Perl locks out the use of \eK in lookaround
|
||||
assertions. In PCRE2, \eK is acted on when it occurs in positive assertions,
|
||||
16. From release 5.32.0, Perl locks out the use of \eK in lookaround
|
||||
assertions. In PCRE2, \eK is acted on when it occurs in positive assertions,
|
||||
but is ignored in negative assertions.
|
||||
.P
|
||||
17. PCRE2 provides some extensions to the Perl regular expression facilities.
|
||||
|
|
|
@ -79,7 +79,7 @@ matching substrings, or if \fB--only-matching\fP, \fB--file-offsets\fP, or
|
|||
(either shown literally, or as an offset), scanning resumes immediately
|
||||
following the match, so that further matches on the same line can be found. If
|
||||
there are multiple patterns, they are all tried on the remainder of the line,
|
||||
but patterns that follow the one that matched are not tried on the earlier
|
||||
but patterns that follow the one that matched are not tried on the earlier
|
||||
matched part of the line.
|
||||
.P
|
||||
This behaviour means that the order in which multiple patterns are specified
|
||||
|
@ -422,7 +422,7 @@ a separate line. Searching normally stops as soon as a matching line is found
|
|||
in a file. However, if the \fB-c\fP (count) option is also used, matching
|
||||
continues in order to obtain the correct count, and those files that have at
|
||||
least one match are listed along with their counts. Using this option with
|
||||
\fB-c\fP is a way of suppressing the listing of files with no matches that
|
||||
\fB-c\fP is a way of suppressing the listing of files with no matches that
|
||||
occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP,
|
||||
\fB-h\fP, or \fB-L\fP options.
|
||||
.TP
|
||||
|
@ -489,12 +489,12 @@ large processing buffer, this should not be a problem, but the \fB-M\fP option
|
|||
does not work when input is read line by line (see \fB--line-buffered\fP.)
|
||||
.TP
|
||||
\fB-m\fP \fInumber\fP, \fB--max-count\fP=\fInumber\fP
|
||||
Stop processing after finding \fInumber\fP matching lines, or non-matching
|
||||
Stop processing after finding \fInumber\fP matching lines, or non-matching
|
||||
lines if \fB-v\fP is also set. Any trailing context lines are output after the
|
||||
final match. In multiline mode, each multiline match counts as just one line
|
||||
for this purpose. If this limit is reached when reading the standard input from
|
||||
a regular file, the file is left positioned just after the last matching line.
|
||||
If \fB-c\fP is also set, the count that is output is never greater than
|
||||
If \fB-c\fP is also set, the count that is output is never greater than
|
||||
\fInumber\fP. This option has no effect if used with \fB-L\fP, \fB-l\fP, or
|
||||
\fB-q\fP, or when just checking for a match in a binary file.
|
||||
.TP
|
||||
|
@ -598,7 +598,7 @@ newline; $r by carriage return; $t by tab; $v by vertical tab.
|
|||
.sp
|
||||
$o<digits> or $o{<digits>} is replaced by the character whose code point is the
|
||||
given octal number. In the first form, up to three octal digits are processed.
|
||||
When more digits are needed in Unicode mode to specify a wide character, the
|
||||
When more digits are needed in Unicode mode to specify a wide character, the
|
||||
second form must be used.
|
||||
.sp
|
||||
$x<digits> or $x{<digits>} is replaced by the character represented by the
|
||||
|
@ -686,12 +686,12 @@ total would always be zero.
|
|||
Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
|
||||
with UTF-8 support. All patterns (including those for any \fB--exclude\fP and
|
||||
\fB--include\fP options) and all lines that are scanned must be valid strings
|
||||
of UTF-8 characters. If an invalid UTF-8 string is encountered, an error
|
||||
of UTF-8 characters. If an invalid UTF-8 string is encountered, an error
|
||||
occurs.
|
||||
.TP
|
||||
\fB-U\fP, \fB--utf-allow-invalid\fP
|
||||
As \fB--utf\fP, but in addition subject lines may contain invalid UTF-8 code
|
||||
unit sequences. These can never form part of any pattern match. Patterns
|
||||
unit sequences. These can never form part of any pattern match. Patterns
|
||||
themselves, however, must still be valid UTF-8 strings. This facility allows
|
||||
valid UTF-8 strings to be sought within arbitrary byte sequences in executable
|
||||
or other binary files. For more details about matching in non-valid UTF-8
|
||||
|
@ -708,7 +708,7 @@ ignored.
|
|||
.TP
|
||||
\fB-v\fP, \fB--invert-match\fP
|
||||
Invert the sense of the match, so that lines which do \fInot\fP match any of
|
||||
the patterns are the ones that are found. When this option is set, options such
|
||||
the patterns are the ones that are found. When this option is set, options such
|
||||
as \fB--only-matching\fP and \fB--output\fP, which specify parts of a match
|
||||
that are to be output, are ignored.
|
||||
.TP
|
||||
|
@ -855,7 +855,7 @@ output string, so if you want a newline, you must include it explicitly using
|
|||
the escape $n. For example:
|
||||
.sp
|
||||
pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' <some file>
|
||||
.sp
|
||||
.sp
|
||||
Matching continues normally after the string is output. If you want to see only
|
||||
the callout output but not any output from an actual match, you should end the
|
||||
pattern with (*FAIL).
|
||||
|
|
|
@ -309,7 +309,7 @@ part of the pattern. If the PCRE2_EXTENDED_MORE option is set, the same
|
|||
applies, but in addition unescaped space and horizontal tab characters are
|
||||
ignored inside a character class. Note: only these two characters are ignored,
|
||||
not the full set of pattern white space characters that are ignored outside a
|
||||
character class. Option settings can be changed within a pattern; see the
|
||||
character class. Option settings can be changed within a pattern; see the
|
||||
section entitled
|
||||
.\" HTML <a href="#internaloptions">
|
||||
.\" </a>
|
||||
|
|
|
@ -372,7 +372,7 @@ patterns. Modifiers on a pattern can change these settings.
|
|||
This line is used in test files that can also be processed by \fBperltest.sh\fP
|
||||
to confirm that Perl gives the same results as PCRE2. Subsequent tests are
|
||||
checked for the use of \fBpcre2test\fP features that are incompatible with the
|
||||
\fBperltest.sh\fP script.
|
||||
\fBperltest.sh\fP script.
|
||||
.P
|
||||
Patterns must use '/' as their delimiter, and only certain modifiers are
|
||||
supported. Comment lines, #pattern commands, and #subject commands that set or
|
||||
|
|
|
@ -224,7 +224,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.35"
|
||||
#define PACKAGE_STRING "PCRE2 10.36-RC1"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
@ -233,7 +233,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.35"
|
||||
#define PACKAGE_VERSION "10.36-RC1"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
|
@ -358,7 +358,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#endif
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.35"
|
||||
#define VERSION "10.36-RC1"
|
||||
|
||||
/* Define to 1 if on MINIX. */
|
||||
/* #undef _MINIX */
|
||||
|
|
|
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 35
|
||||
#define PCRE2_PRERELEASE
|
||||
#define PCRE2_DATE 2020-05-09
|
||||
#define PCRE2_MINOR 36
|
||||
#define PCRE2_PRERELEASE -RC1
|
||||
#define PCRE2_DATE 2020-11-05
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
|
|
|
@ -63,7 +63,7 @@ given, they are written in binary. */
|
|||
#include "pcre2_maketables.c"
|
||||
|
||||
|
||||
static char *classlist[] =
|
||||
static const char *classlist[] =
|
||||
{
|
||||
"space", "xdigit", "digit", "upper", "lower",
|
||||
"word", "graph", "print", "punct", "cntrl"
|
||||
|
@ -97,7 +97,7 @@ FILE *f;
|
|||
int i;
|
||||
int nclass = 0;
|
||||
BOOL binary = FALSE;
|
||||
char *env = "C";
|
||||
char *env = (char *)"C";
|
||||
const unsigned char *tables;
|
||||
const unsigned char *base_of_tables;
|
||||
|
||||
|
@ -105,7 +105,7 @@ const unsigned char *base_of_tables;
|
|||
|
||||
for (i = 1; i < argc; i++)
|
||||
{
|
||||
unsigned char *arg = (unsigned char *)argv[i];
|
||||
char *arg = argv[i];
|
||||
if (*arg != '-') break;
|
||||
|
||||
if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
|
||||
|
|
|
@ -7192,7 +7192,7 @@ if (utf && end_subject != true_end_subject &&
|
|||
starting code units in 8-bit and 16-bit modes. */
|
||||
|
||||
start_match = end_subject + 1;
|
||||
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
|
||||
start_match++;
|
||||
|
|
|
@ -2070,7 +2070,7 @@ if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
|
|||
uint32_t max = utf? 0x0010ffffu : 0xffu;
|
||||
if (*value > max)
|
||||
{
|
||||
if (!callout)
|
||||
if (!callout)
|
||||
fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
|
||||
"code point greater than 0x%x is invalid\n", (int)(string - begin), max);
|
||||
rc = DDE_ERROR;
|
||||
|
@ -2376,8 +2376,8 @@ while (length > 0)
|
|||
if (value == STDOUT_NL_CODE)
|
||||
{
|
||||
memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
|
||||
argsptr += STDOUT_NL_LEN;
|
||||
}
|
||||
argsptr += STDOUT_NL_LEN;
|
||||
}
|
||||
else if (utf && value > 127)
|
||||
{
|
||||
int n = ord2utf8(value);
|
||||
|
|
|
@ -5141,8 +5141,8 @@ PCRE2_SIZE erroroffset;
|
|||
if (restrict_for_perl_test && delimiter != '/')
|
||||
{
|
||||
fprintf(outfile, "** The only allowed delimiter after #perltest is '/'\n");
|
||||
return PR_ABEND;
|
||||
}
|
||||
return PR_ABEND;
|
||||
}
|
||||
|
||||
/* Initialize the context and pattern/data controls for this test from the
|
||||
defaults. */
|
||||
|
|
Loading…
Reference in New Issue