File tidies for 10.36-RC1
This commit is contained in:
parent
dc426be88e
commit
000bbf2ea7
4
AUTHORS
4
AUTHORS
|
@ -2,8 +2,8 @@ THE MAIN PCRE2 LIBRARY CODE
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
Written by: Philip Hazel
|
Written by: Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: Philip.Hazel
|
||||||
Email domain: cam.ac.uk
|
Email domain: gmail.com
|
||||||
|
|
||||||
University of Cambridge Computing Service,
|
University of Cambridge Computing Service,
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
|
|
|
@ -149,7 +149,7 @@ CHECK_C_SOURCE_COMPILES(
|
||||||
set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS})
|
set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS})
|
||||||
|
|
||||||
# Check whether Intel CET is enabled, and if so, adjust compiler flags. This
|
# Check whether Intel CET is enabled, and if so, adjust compiler flags. This
|
||||||
# code was written by PH, trying to imitate the logic from the autotools
|
# code was written by PH, trying to imitate the logic from the autotools
|
||||||
# configuration.
|
# configuration.
|
||||||
|
|
||||||
CHECK_C_SOURCE_COMPILES(
|
CHECK_C_SOURCE_COMPILES(
|
||||||
|
@ -162,8 +162,8 @@ CHECK_C_SOURCE_COMPILES(
|
||||||
|
|
||||||
IF (INTEL_CET_ENABLED)
|
IF (INTEL_CET_ENABLED)
|
||||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk")
|
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk")
|
||||||
ENDIF(INTEL_CET_ENABLED)
|
ENDIF(INTEL_CET_ENABLED)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# User-configurable options
|
# User-configurable options
|
||||||
|
|
72
ChangeLog
72
ChangeLog
|
@ -1,12 +1,12 @@
|
||||||
Change Log for PCRE2
|
Change Log for PCRE2
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
Version 10.36-RC1 25-May-2020
|
Version 10.36-RC1 05-November-2020
|
||||||
-----------------------------
|
----------------------------------
|
||||||
|
|
||||||
1. Add CET_CFLAGS so that when Intel CET is enabled, pass -mshstk to
|
1. Add CET_CFLAGS so that when Intel CET is enabled, pass -mshstk to
|
||||||
compiler. This fixes https://bugs.exim.org/show_bug.cgi?id=2578. Patch for
|
compiler. This fixes https://bugs.exim.org/show_bug.cgi?id=2578. Patch for
|
||||||
Makefile.am and configure.ac by H.J. Lu. Equivalent patch for CMakeLists.txt
|
Makefile.am and configure.ac by H.J. Lu. Equivalent patch for CMakeLists.txt
|
||||||
invented by PH.
|
invented by PH.
|
||||||
|
|
||||||
2. Fix inifinite loop when a single byte newline is searched in JIT when
|
2. Fix inifinite loop when a single byte newline is searched in JIT when
|
||||||
|
@ -18,12 +18,12 @@ invalid utf8 mode is enabled.
|
||||||
lib. This allows differentiation between lib and lib64.
|
lib. This allows differentiation between lib and lib64.
|
||||||
CMAKE_INSTALL_LIBDIR is used for installation of libraries and also for
|
CMAKE_INSTALL_LIBDIR is used for installation of libraries and also for
|
||||||
pkgconfig file generation.
|
pkgconfig file generation.
|
||||||
|
|
||||||
- Add the version of PCRE2 to the configuration summary like ./configure
|
- Add the version of PCRE2 to the configuration summary like ./configure
|
||||||
does.
|
does.
|
||||||
|
|
||||||
- Fix typo: MACTHED_STRING->MATCHED_STRING
|
- Fix typo: MACTHED_STRING->MATCHED_STRING
|
||||||
|
|
||||||
4. Updated CMakeLists.txt with another patch from Wolfgang Stöggl (Bugzilla
|
4. Updated CMakeLists.txt with another patch from Wolfgang Stöggl (Bugzilla
|
||||||
#2588):
|
#2588):
|
||||||
|
|
||||||
|
@ -31,12 +31,12 @@ invalid utf8 mode is enabled.
|
||||||
allow spaces in directory names.
|
allow spaces in directory names.
|
||||||
|
|
||||||
- This fixes a cmake error, if the path of the pcre2 source contains a space.
|
- This fixes a cmake error, if the path of the pcre2 source contains a space.
|
||||||
|
|
||||||
5. Updated CMakeLists.txt with a patch from B. Scott Michel: CMake's
|
5. Updated CMakeLists.txt with a patch from B. Scott Michel: CMake's
|
||||||
documentation suggests using CHECK_SYMBOL_EXISTS over CHECK_FUNCTION_EXIST.
|
documentation suggests using CHECK_SYMBOL_EXISTS over CHECK_FUNCTION_EXIST.
|
||||||
Moreover, these functions come from specific header files, which need to be
|
Moreover, these functions come from specific header files, which need to be
|
||||||
specified (and, thankfully, are the same on both the Linux and WinXX
|
specified (and, thankfully, are the same on both the Linux and WinXX
|
||||||
platforms.)
|
platforms.)
|
||||||
|
|
||||||
6. Added a (uint32_t) cast to prevent a compiler warning in pcre2_compile.c.
|
6. Added a (uint32_t) cast to prevent a compiler warning in pcre2_compile.c.
|
||||||
|
|
||||||
|
@ -44,50 +44,50 @@ platforms.)
|
||||||
debug Windows builds using CMake. This also updated configure so that it
|
debug Windows builds using CMake. This also updated configure so that it
|
||||||
generates *.pc files and pcre2-config with the same content, as in the past.
|
generates *.pc files and pcre2-config with the same content, as in the past.
|
||||||
|
|
||||||
8. If a pattern ended with (?(VERSION=n.d where n is any number but d is just a
|
8. If a pattern ended with (?(VERSION=n.d where n is any number but d is just a
|
||||||
single digit, the code unit beyond d was being read (i.e. there was a read
|
single digit, the code unit beyond d was being read (i.e. there was a read
|
||||||
buffer overflow). Fixes ClusterFuzz 23779.
|
buffer overflow). Fixes ClusterFuzz 23779.
|
||||||
|
|
||||||
9. After the rework in r1235, certain character ranges were incorrectly
|
9. After the rework in r1235, certain character ranges were incorrectly
|
||||||
handled by an optimization in JIT. Furthermore a wrong offset was used to
|
handled by an optimization in JIT. Furthermore a wrong offset was used to
|
||||||
read a value from a buffer which could lead to memory overread.
|
read a value from a buffer which could lead to memory overread.
|
||||||
|
|
||||||
10. Unnoticed for many years was the fact that delimiters other than / in the
|
10. Unnoticed for many years was the fact that delimiters other than / in the
|
||||||
testinput1 and testinput4 files could cause incorrect behaviour when these
|
testinput1 and testinput4 files could cause incorrect behaviour when these
|
||||||
files were processed by perltest.sh. There were several tests that used quotes
|
files were processed by perltest.sh. There were several tests that used quotes
|
||||||
as delimiters, and it was just luck that they didn't go wrong with perltest.sh.
|
as delimiters, and it was just luck that they didn't go wrong with perltest.sh.
|
||||||
All the patterns in testinput1 and testinput4 now use / as their delimiter.
|
All the patterns in testinput1 and testinput4 now use / as their delimiter.
|
||||||
This fixes Bugzilla #2641.
|
This fixes Bugzilla #2641.
|
||||||
|
|
||||||
11. Perl has started to give an error for \K within lookarounds (though there
|
11. Perl has started to give an error for \K within lookarounds (though there
|
||||||
are cases where it doesn't). PCRE2 still allows this, so the tests that include
|
are cases where it doesn't). PCRE2 still allows this, so the tests that include
|
||||||
this case have been moved from test 1 to test 2.
|
this case have been moved from test 1 to test 2.
|
||||||
|
|
||||||
12. Further to 10 above, pcre2test has been updated to detect and grumble if a
|
12. Further to 10 above, pcre2test has been updated to detect and grumble if a
|
||||||
delimiter other than / is used after #perltest.
|
delimiter other than / is used after #perltest.
|
||||||
|
|
||||||
13. Fixed a bug with PCRE2_MATCH_INVALID_UTF in 8-bit mode when PCRE2_CASELESS
|
13. Fixed a bug with PCRE2_MATCH_INVALID_UTF in 8-bit mode when PCRE2_CASELESS
|
||||||
was set and PCRE2_NO_START_OPTIMIZE was not set. The optimization for finding
|
was set and PCRE2_NO_START_OPTIMIZE was not set. The optimization for finding
|
||||||
the start of a match was not resetting correctly after a failed match on the
|
the start of a match was not resetting correctly after a failed match on the
|
||||||
first valid fragment of the subject, possibly causing incorrect "no match"
|
first valid fragment of the subject, possibly causing incorrect "no match"
|
||||||
returns on subsequent fragments. For example, the pattern /A/ failed to match
|
returns on subsequent fragments. For example, the pattern /A/ failed to match
|
||||||
the subject \xe5A. Fixes Bugzilla #2642.
|
the subject \xe5A. Fixes Bugzilla #2642.
|
||||||
|
|
||||||
14. Fixed a bug in character set matching when JIT is enabled and both unicode
|
14. Fixed a bug in character set matching when JIT is enabled and both unicode
|
||||||
scripts and unicode classes are present at the same time.
|
scripts and unicode classes are present at the same time.
|
||||||
|
|
||||||
15. Added GNU grep's -m (aka --max-count) option to pcre2grep.
|
15. Added GNU grep's -m (aka --max-count) option to pcre2grep.
|
||||||
|
|
||||||
16. Refactored substitution processing in pcre2grep strings, both for the -O
|
16. Refactored substitution processing in pcre2grep strings, both for the -O
|
||||||
option and when dealing with callouts. There is now a single function that
|
option and when dealing with callouts. There is now a single function that
|
||||||
handles $ expansion in all cases (instead of multiple copies of almost
|
handles $ expansion in all cases (instead of multiple copies of almost
|
||||||
identical code). This means that the same escape sequences are available
|
identical code). This means that the same escape sequences are available
|
||||||
everywhere, which was not previously the case. At the same time, the escape
|
everywhere, which was not previously the case. At the same time, the escape
|
||||||
sequences $x{...} and $o{...} have been introduced, to allow for characters
|
sequences $x{...} and $o{...} have been introduced, to allow for characters
|
||||||
whose code points are greater than 255 in Unicode mode.
|
whose code points are greater than 255 in Unicode mode.
|
||||||
|
|
||||||
17. Applied the patch from Bugzilla #2628 to RunGrepTest. This does an explicit
|
17. Applied the patch from Bugzilla #2628 to RunGrepTest. This does an explicit
|
||||||
test for a version of sed that can handle binary zero, instead of assuming that
|
test for a version of sed that can handle binary zero, instead of assuming that
|
||||||
any Linux version will work.
|
any Linux version will work.
|
||||||
|
|
||||||
18. Fixed a word boundary check bug in JIT when partial matching is enabled.
|
18. Fixed a word boundary check bug in JIT when partial matching is enabled.
|
||||||
|
|
4
LICENCE
4
LICENCE
|
@ -20,8 +20,8 @@ THE BASIC LIBRARY FUNCTIONS
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
Written by: Philip Hazel
|
Written by: Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: Philip.Hazel
|
||||||
Email domain: cam.ac.uk
|
Email domain: gmail.com
|
||||||
|
|
||||||
University of Cambridge Computing Service,
|
University of Cambridge Computing Service,
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
|
|
|
@ -448,15 +448,15 @@ EXTRA_DIST += \
|
||||||
src/sljit/sljitNativePPC_32.c \
|
src/sljit/sljitNativePPC_32.c \
|
||||||
src/sljit/sljitNativePPC_64.c \
|
src/sljit/sljitNativePPC_64.c \
|
||||||
src/sljit/sljitNativePPC_common.c \
|
src/sljit/sljitNativePPC_common.c \
|
||||||
|
src/sljit/sljitNativeS390X.c \
|
||||||
src/sljit/sljitNativeSPARC_32.c \
|
src/sljit/sljitNativeSPARC_32.c \
|
||||||
src/sljit/sljitNativeSPARC_common.c \
|
src/sljit/sljitNativeSPARC_common.c \
|
||||||
src/sljit/sljitNativeTILEGX-encoder.c \
|
|
||||||
src/sljit/sljitNativeTILEGX_64.c \
|
|
||||||
src/sljit/sljitNativeX86_32.c \
|
src/sljit/sljitNativeX86_32.c \
|
||||||
src/sljit/sljitNativeX86_64.c \
|
src/sljit/sljitNativeX86_64.c \
|
||||||
src/sljit/sljitNativeX86_common.c \
|
src/sljit/sljitNativeX86_common.c \
|
||||||
src/sljit/sljitProtExecAllocator.c \
|
src/sljit/sljitProtExecAllocator.c \
|
||||||
src/sljit/sljitUtils.c
|
src/sljit/sljitUtils.c \
|
||||||
|
src/sljit/sljitWXExecAllocator.c
|
||||||
|
|
||||||
# Some of the JIT sources are also in separate files that are #included.
|
# Some of the JIT sources are also in separate files that are #included.
|
||||||
|
|
||||||
|
|
10
NEWS
10
NEWS
|
@ -2,6 +2,16 @@ News about PCRE2 releases
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
|
|
||||||
|
Version 10.36 05-November-2020
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
Again, mainly bug fixes and tidies. The only enhancements are the addition of
|
||||||
|
GNU grep's -m (aka --max-count) option to pcre2grep, and also unifying the
|
||||||
|
handling of substitution strings for both -O and callouts in pcre2grep, with
|
||||||
|
the addition of $x{...} and $o{...} to allow for characters whose code points
|
||||||
|
are greater than 255 in Unicode mode.
|
||||||
|
|
||||||
|
|
||||||
Version 10.35 15-April-2020
|
Version 10.35 15-April-2020
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
|
|
6
README
6
README
|
@ -297,8 +297,8 @@ library. They are also documented in the pcre2build man page.
|
||||||
unaddressable. This allows it to detect invalid memory accesses, and is
|
unaddressable. This allows it to detect invalid memory accesses, and is
|
||||||
mostly useful for debugging PCRE2 itself.
|
mostly useful for debugging PCRE2 itself.
|
||||||
|
|
||||||
. In environments where the gcc compiler is used and lcov version 1.6 or above
|
. In environments where the gcc compiler is used and lcov is installed, if you
|
||||||
is installed, if you specify
|
specify
|
||||||
|
|
||||||
--enable-coverage
|
--enable-coverage
|
||||||
|
|
||||||
|
@ -894,4 +894,4 @@ The distribution should contain the files listed below.
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: Philip.Hazel
|
Email local part: Philip.Hazel
|
||||||
Email domain: gmail.com
|
Email domain: gmail.com
|
||||||
Last updated: 22 September 2020
|
Last updated: 06 November 2020
|
||||||
|
|
|
@ -674,11 +674,11 @@ echo "---------------------------- Test 131 -----------------------------" >>tes
|
||||||
echo "RC=$?" >>testtrygrep
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 132 -----------------------------" >>testtrygrep
|
echo "---------------------------- Test 132 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -A3 '^match'; echo '---'; head -1) <testdata/grepinput >>testtrygrep 2>&1
|
(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -A3 '^match'; echo '---'; head -1) <$srcdir/testdata/grepinput >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtrygrep
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test 133 -----------------------------" >>testtrygrep
|
echo "---------------------------- Test 133 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <testdata/grepinputv >>testtrygrep 2>&1
|
(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtrygrep
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
# Now compare the results.
|
# Now compare the results.
|
||||||
|
@ -715,7 +715,7 @@ if [ $utf8 -ne 0 ] ; then
|
||||||
echo "RC=$?" >>testtrygrep
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
echo "---------------------------- Test U6 -----------------------------" >>testtrygrep
|
echo "---------------------------- Test U6 -----------------------------" >>testtrygrep
|
||||||
(cd $srcdir; $valgrind $vjs $pcre2grep -u -m1 -O '=$x{1d3}$o{744}=' 'fox') <testdata/grepinputv >>testtrygrep 2>&1
|
(cd $srcdir; $valgrind $vjs $pcre2grep -u -m1 -O '=$x{1d3}$o{744}=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
|
||||||
echo "RC=$?" >>testtrygrep
|
echo "RC=$?" >>testtrygrep
|
||||||
|
|
||||||
$cf $srcdir/testdata/grepoutput8 testtrygrep
|
$cf $srcdir/testdata/grepoutput8 testtrygrep
|
||||||
|
@ -759,7 +759,7 @@ $valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgr
|
||||||
# systems, including Solaris (aka SunOS), where the version of sed explicitly
|
# systems, including Solaris (aka SunOS), where the version of sed explicitly
|
||||||
# doesn't like them, and also MacOS (Darwin), OpenBSD, FreeBSD, NetBSD, and
|
# doesn't like them, and also MacOS (Darwin), OpenBSD, FreeBSD, NetBSD, and
|
||||||
# some Linux distributions like Alpine, even when using GNU sed, so test for
|
# some Linux distributions like Alpine, even when using GNU sed, so test for
|
||||||
# a usable sed and fudge the output so that the comparison works when sed
|
# a usable sed and fudge the output so that the comparison works when sed
|
||||||
# doesn't.
|
# doesn't.
|
||||||
|
|
||||||
printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
|
printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
|
||||||
|
|
|
@ -11,12 +11,12 @@ dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||||
m4_define(pcre2_major, [10])
|
m4_define(pcre2_major, [10])
|
||||||
m4_define(pcre2_minor, [36])
|
m4_define(pcre2_minor, [36])
|
||||||
m4_define(pcre2_prerelease, [-RC1])
|
m4_define(pcre2_prerelease, [-RC1])
|
||||||
m4_define(pcre2_date, [2020-06-01])
|
m4_define(pcre2_date, [2020-11-05])
|
||||||
|
|
||||||
# Libtool shared library interface versions (current:revision:age)
|
# Libtool shared library interface versions (current:revision:age)
|
||||||
m4_define(libpcre2_8_version, [10:0:10])
|
m4_define(libpcre2_8_version, [10:1:10])
|
||||||
m4_define(libpcre2_16_version, [10:0:10])
|
m4_define(libpcre2_16_version, [10:1:10])
|
||||||
m4_define(libpcre2_32_version, [10:0:10])
|
m4_define(libpcre2_32_version, [10:1:10])
|
||||||
m4_define(libpcre2_posix_version, [2:3:0])
|
m4_define(libpcre2_posix_version, [2:3:0])
|
||||||
|
|
||||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||||
|
|
|
@ -297,8 +297,8 @@ library. They are also documented in the pcre2build man page.
|
||||||
unaddressable. This allows it to detect invalid memory accesses, and is
|
unaddressable. This allows it to detect invalid memory accesses, and is
|
||||||
mostly useful for debugging PCRE2 itself.
|
mostly useful for debugging PCRE2 itself.
|
||||||
|
|
||||||
. In environments where the gcc compiler is used and lcov version 1.6 or above
|
. In environments where the gcc compiler is used and lcov is installed, if you
|
||||||
is installed, if you specify
|
specify
|
||||||
|
|
||||||
--enable-coverage
|
--enable-coverage
|
||||||
|
|
||||||
|
@ -894,4 +894,4 @@ The distribution should contain the files listed below.
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: Philip.Hazel
|
Email local part: Philip.Hazel
|
||||||
Email domain: gmail.com
|
Email domain: gmail.com
|
||||||
Last updated: 22 September 2020
|
Last updated: 06 November 2020
|
||||||
|
|
|
@ -662,11 +662,11 @@ is valid" flag (that can be updated atomically) must be used:
|
||||||
if (!pointer_is_valid)
|
if (!pointer_is_valid)
|
||||||
{
|
{
|
||||||
Get a write (unique) lock for pointer
|
Get a write (unique) lock for pointer
|
||||||
if (!pointer_is_valid)
|
if (!pointer_is_valid)
|
||||||
{
|
{
|
||||||
pointer = pcre2_compile(...
|
pointer = pcre2_compile(...
|
||||||
pointer_is_valid = TRUE
|
pointer_is_valid = TRUE
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Release the lock
|
Release the lock
|
||||||
Use pointer in pcre2_match()
|
Use pointer in pcre2_match()
|
||||||
|
|
|
@ -34,13 +34,13 @@ that the next three characters are not "a". It just asserts that the next
|
||||||
character is not "a" three times (in principle; PCRE2 optimizes this to run the
|
character is not "a" three times (in principle; PCRE2 optimizes this to run the
|
||||||
assertion just once). Perl allows some repeat quantifiers on other assertions,
|
assertion just once). Perl allows some repeat quantifiers on other assertions,
|
||||||
for example, \b* (but not \b{3}, though oddly it does allow ^{3}), but these
|
for example, \b* (but not \b{3}, though oddly it does allow ^{3}), but these
|
||||||
do not seem to have any use. PCRE2 does not allow any kind of quantifier on
|
do not seem to have any use. PCRE2 does not allow any kind of quantifier on
|
||||||
non-lookaround assertions.
|
non-lookaround assertions.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
3. Capture groups that occur inside negative lookaround assertions are counted,
|
3. Capture groups that occur inside negative lookaround assertions are counted,
|
||||||
but their entries in the offsets vector are set only when a negative assertion
|
but their entries in the offsets vector are set only when a negative assertion
|
||||||
is a condition that has a matching branch (that is, the condition is false).
|
is a condition that has a matching branch (that is, the condition is false).
|
||||||
Perl may set such capture groups in other circumstances.
|
Perl may set such capture groups in other circumstances.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -84,7 +84,7 @@ other character. Note the following examples:
|
||||||
\QA\B\E A\B A\B
|
\QA\B\E A\B A\B
|
||||||
\Q\\E \ \\E
|
\Q\\E \ \\E
|
||||||
</pre>
|
</pre>
|
||||||
The \Q...\E sequence is recognized both inside and outside character classes
|
The \Q...\E sequence is recognized both inside and outside character classes
|
||||||
by both PCRE2 and Perl.
|
by both PCRE2 and Perl.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -152,8 +152,8 @@ in the release at the time of writing (5.32), \p{Lu} and \p{Ll} match all
|
||||||
letters, regardless of case, when case independence is specified.
|
letters, regardless of case, when case independence is specified.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
16. From release 5.32.0, Perl locks out the use of \K in lookaround
|
16. From release 5.32.0, Perl locks out the use of \K in lookaround
|
||||||
assertions. In PCRE2, \K is acted on when it occurs in positive assertions,
|
assertions. In PCRE2, \K is acted on when it occurs in positive assertions,
|
||||||
but is ignored in negative assertions.
|
but is ignored in negative assertions.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
|
|
@ -111,7 +111,7 @@ matching substrings, or if <b>--only-matching</b>, <b>--file-offsets</b>, or
|
||||||
(either shown literally, or as an offset), scanning resumes immediately
|
(either shown literally, or as an offset), scanning resumes immediately
|
||||||
following the match, so that further matches on the same line can be found. If
|
following the match, so that further matches on the same line can be found. If
|
||||||
there are multiple patterns, they are all tried on the remainder of the line,
|
there are multiple patterns, they are all tried on the remainder of the line,
|
||||||
but patterns that follow the one that matched are not tried on the earlier
|
but patterns that follow the one that matched are not tried on the earlier
|
||||||
matched part of the line.
|
matched part of the line.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -486,7 +486,7 @@ a separate line. Searching normally stops as soon as a matching line is found
|
||||||
in a file. However, if the <b>-c</b> (count) option is also used, matching
|
in a file. However, if the <b>-c</b> (count) option is also used, matching
|
||||||
continues in order to obtain the correct count, and those files that have at
|
continues in order to obtain the correct count, and those files that have at
|
||||||
least one match are listed along with their counts. Using this option with
|
least one match are listed along with their counts. Using this option with
|
||||||
<b>-c</b> is a way of suppressing the listing of files with no matches that
|
<b>-c</b> is a way of suppressing the listing of files with no matches that
|
||||||
occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>,
|
occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>,
|
||||||
<b>-h</b>, or <b>-L</b> options.
|
<b>-h</b>, or <b>-L</b> options.
|
||||||
</P>
|
</P>
|
||||||
|
@ -561,12 +561,12 @@ does not work when input is read line by line (see <b>--line-buffered</b>.)
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-m</b> <i>number</i>, <b>--max-count</b>=<i>number</i>
|
<b>-m</b> <i>number</i>, <b>--max-count</b>=<i>number</i>
|
||||||
Stop processing after finding <i>number</i> matching lines, or non-matching
|
Stop processing after finding <i>number</i> matching lines, or non-matching
|
||||||
lines if <b>-v</b> is also set. Any trailing context lines are output after the
|
lines if <b>-v</b> is also set. Any trailing context lines are output after the
|
||||||
final match. In multiline mode, each multiline match counts as just one line
|
final match. In multiline mode, each multiline match counts as just one line
|
||||||
for this purpose. If this limit is reached when reading the standard input from
|
for this purpose. If this limit is reached when reading the standard input from
|
||||||
a regular file, the file is left positioned just after the last matching line.
|
a regular file, the file is left positioned just after the last matching line.
|
||||||
If <b>-c</b> is also set, the count that is output is never greater than
|
If <b>-c</b> is also set, the count that is output is never greater than
|
||||||
<i>number</i>. This option has no effect if used with <b>-L</b>, <b>-l</b>, or
|
<i>number</i>. This option has no effect if used with <b>-L</b>, <b>-l</b>, or
|
||||||
<b>-q</b>, or when just checking for a match in a binary file.
|
<b>-q</b>, or when just checking for a match in a binary file.
|
||||||
</P>
|
</P>
|
||||||
|
@ -686,7 +686,7 @@ newline; $r by carriage return; $t by tab; $v by vertical tab.
|
||||||
<br>
|
<br>
|
||||||
$o<digits> or $o{<digits>} is replaced by the character whose code point is the
|
$o<digits> or $o{<digits>} is replaced by the character whose code point is the
|
||||||
given octal number. In the first form, up to three octal digits are processed.
|
given octal number. In the first form, up to three octal digits are processed.
|
||||||
When more digits are needed in Unicode mode to specify a wide character, the
|
When more digits are needed in Unicode mode to specify a wide character, the
|
||||||
second form must be used.
|
second form must be used.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
@ -788,13 +788,13 @@ total would always be zero.
|
||||||
Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
|
Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
|
||||||
with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
|
with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
|
||||||
<b>--include</b> options) and all lines that are scanned must be valid strings
|
<b>--include</b> options) and all lines that are scanned must be valid strings
|
||||||
of UTF-8 characters. If an invalid UTF-8 string is encountered, an error
|
of UTF-8 characters. If an invalid UTF-8 string is encountered, an error
|
||||||
occurs.
|
occurs.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-U</b>, <b>--utf-allow-invalid</b>
|
<b>-U</b>, <b>--utf-allow-invalid</b>
|
||||||
As <b>--utf</b>, but in addition subject lines may contain invalid UTF-8 code
|
As <b>--utf</b>, but in addition subject lines may contain invalid UTF-8 code
|
||||||
unit sequences. These can never form part of any pattern match. Patterns
|
unit sequences. These can never form part of any pattern match. Patterns
|
||||||
themselves, however, must still be valid UTF-8 strings. This facility allows
|
themselves, however, must still be valid UTF-8 strings. This facility allows
|
||||||
valid UTF-8 strings to be sought within arbitrary byte sequences in executable
|
valid UTF-8 strings to be sought within arbitrary byte sequences in executable
|
||||||
or other binary files. For more details about matching in non-valid UTF-8
|
or other binary files. For more details about matching in non-valid UTF-8
|
||||||
|
@ -811,7 +811,7 @@ ignored.
|
||||||
<P>
|
<P>
|
||||||
<b>-v</b>, <b>--invert-match</b>
|
<b>-v</b>, <b>--invert-match</b>
|
||||||
Invert the sense of the match, so that lines which do <i>not</i> match any of
|
Invert the sense of the match, so that lines which do <i>not</i> match any of
|
||||||
the patterns are the ones that are found. When this option is set, options such
|
the patterns are the ones that are found. When this option is set, options such
|
||||||
as <b>--only-matching</b> and <b>--output</b>, which specify parts of a match
|
as <b>--only-matching</b> and <b>--output</b>, which specify parts of a match
|
||||||
that are to be output, are ignored.
|
that are to be output, are ignored.
|
||||||
</P>
|
</P>
|
||||||
|
|
|
@ -337,7 +337,7 @@ part of the pattern. If the PCRE2_EXTENDED_MORE option is set, the same
|
||||||
applies, but in addition unescaped space and horizontal tab characters are
|
applies, but in addition unescaped space and horizontal tab characters are
|
||||||
ignored inside a character class. Note: only these two characters are ignored,
|
ignored inside a character class. Note: only these two characters are ignored,
|
||||||
not the full set of pattern white space characters that are ignored outside a
|
not the full set of pattern white space characters that are ignored outside a
|
||||||
character class. Option settings can be changed within a pattern; see the
|
character class. Option settings can be changed within a pattern; see the
|
||||||
section entitled
|
section entitled
|
||||||
<a href="#internaloptions">"Internal Option Setting"</a>
|
<a href="#internaloptions">"Internal Option Setting"</a>
|
||||||
below.
|
below.
|
||||||
|
|
|
@ -423,7 +423,7 @@ patterns. Modifiers on a pattern can change these settings.
|
||||||
This line is used in test files that can also be processed by <b>perltest.sh</b>
|
This line is used in test files that can also be processed by <b>perltest.sh</b>
|
||||||
to confirm that Perl gives the same results as PCRE2. Subsequent tests are
|
to confirm that Perl gives the same results as PCRE2. Subsequent tests are
|
||||||
checked for the use of <b>pcre2test</b> features that are incompatible with the
|
checked for the use of <b>pcre2test</b> features that are incompatible with the
|
||||||
<b>perltest.sh</b> script.
|
<b>perltest.sh</b> script.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Patterns must use '/' as their delimiter, and only certain modifiers are
|
Patterns must use '/' as their delimiter, and only certain modifiers are
|
||||||
|
|
|
@ -180,8 +180,8 @@ REVISION
|
||||||
Last updated: 17 September 2018
|
Last updated: 17 September 2018
|
||||||
Copyright (c) 1997-2018 University of Cambridge.
|
Copyright (c) 1997-2018 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2API(3) Library Functions Manual PCRE2API(3)
|
PCRE2API(3) Library Functions Manual PCRE2API(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -3829,8 +3829,8 @@ REVISION
|
||||||
Last updated: 04 November 2020
|
Last updated: 04 November 2020
|
||||||
Copyright (c) 1997-2020 University of Cambridge.
|
Copyright (c) 1997-2020 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3)
|
PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -4423,8 +4423,8 @@ REVISION
|
||||||
Last updated: 20 March 2020
|
Last updated: 20 March 2020
|
||||||
Copyright (c) 1997-2020 University of Cambridge.
|
Copyright (c) 1997-2020 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3)
|
PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -4853,8 +4853,8 @@ REVISION
|
||||||
Last updated: 03 February 2019
|
Last updated: 03 February 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3)
|
PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -5066,8 +5066,8 @@ REVISION
|
||||||
Last updated: 06 October 2020
|
Last updated: 06 October 2020
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2JIT(3) Library Functions Manual PCRE2JIT(3)
|
PCRE2JIT(3) Library Functions Manual PCRE2JIT(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -5491,8 +5491,8 @@ REVISION
|
||||||
Last updated: 23 May 2019
|
Last updated: 23 May 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3)
|
PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -5561,8 +5561,8 @@ REVISION
|
||||||
Last updated: 02 February 2019
|
Last updated: 02 February 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3)
|
PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -5785,8 +5785,8 @@ REVISION
|
||||||
Last updated: 23 May 2019
|
Last updated: 23 May 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3)
|
PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -6165,8 +6165,8 @@ REVISION
|
||||||
Last updated: 04 September 2019
|
Last updated: 04 September 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3)
|
PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -9613,8 +9613,8 @@ REVISION
|
||||||
Last updated: 06 October 2020
|
Last updated: 06 October 2020
|
||||||
Copyright (c) 1997-2020 University of Cambridge.
|
Copyright (c) 1997-2020 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3)
|
PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -9848,8 +9848,8 @@ REVISION
|
||||||
Last updated: 03 February 2019
|
Last updated: 03 February 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2POSIX(3) Library Functions Manual PCRE2POSIX(3)
|
PCRE2POSIX(3) Library Functions Manual PCRE2POSIX(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -10178,8 +10178,8 @@ REVISION
|
||||||
Last updated: 30 January 2019
|
Last updated: 30 January 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3)
|
PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -10457,8 +10457,8 @@ REVISION
|
||||||
Last updated: 27 June 2018
|
Last updated: 27 June 2018
|
||||||
Copyright (c) 1997-2018 University of Cambridge.
|
Copyright (c) 1997-2018 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2SYNTAX(3) Library Functions Manual PCRE2SYNTAX(3)
|
PCRE2SYNTAX(3) Library Functions Manual PCRE2SYNTAX(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -10973,8 +10973,8 @@ REVISION
|
||||||
Last updated: 28 December 2019
|
Last updated: 28 December 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3)
|
PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -11408,5 +11408,5 @@ REVISION
|
||||||
Last updated: 23 February 2020
|
Last updated: 23 February 2020
|
||||||
Copyright (c) 1997-2020 University of Cambridge.
|
Copyright (c) 1997-2020 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -598,11 +598,11 @@ is valid" flag (that can be updated atomically) must be used:
|
||||||
if (!pointer_is_valid)
|
if (!pointer_is_valid)
|
||||||
{
|
{
|
||||||
Get a write (unique) lock for pointer
|
Get a write (unique) lock for pointer
|
||||||
if (!pointer_is_valid)
|
if (!pointer_is_valid)
|
||||||
{
|
{
|
||||||
pointer = pcre2_compile(...
|
pointer = pcre2_compile(...
|
||||||
pointer_is_valid = TRUE
|
pointer_is_valid = TRUE
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Release the lock
|
Release the lock
|
||||||
Use pointer in pcre2_match()
|
Use pointer in pcre2_match()
|
||||||
|
|
|
@ -22,12 +22,12 @@ that the next three characters are not "a". It just asserts that the next
|
||||||
character is not "a" three times (in principle; PCRE2 optimizes this to run the
|
character is not "a" three times (in principle; PCRE2 optimizes this to run the
|
||||||
assertion just once). Perl allows some repeat quantifiers on other assertions,
|
assertion just once). Perl allows some repeat quantifiers on other assertions,
|
||||||
for example, \eb* (but not \eb{3}, though oddly it does allow ^{3}), but these
|
for example, \eb* (but not \eb{3}, though oddly it does allow ^{3}), but these
|
||||||
do not seem to have any use. PCRE2 does not allow any kind of quantifier on
|
do not seem to have any use. PCRE2 does not allow any kind of quantifier on
|
||||||
non-lookaround assertions.
|
non-lookaround assertions.
|
||||||
.P
|
.P
|
||||||
3. Capture groups that occur inside negative lookaround assertions are counted,
|
3. Capture groups that occur inside negative lookaround assertions are counted,
|
||||||
but their entries in the offsets vector are set only when a negative assertion
|
but their entries in the offsets vector are set only when a negative assertion
|
||||||
is a condition that has a matching branch (that is, the condition is false).
|
is a condition that has a matching branch (that is, the condition is false).
|
||||||
Perl may set such capture groups in other circumstances.
|
Perl may set such capture groups in other circumstances.
|
||||||
.P
|
.P
|
||||||
4. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu,
|
4. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu,
|
||||||
|
@ -72,7 +72,7 @@ other character. Note the following examples:
|
||||||
\eQA\eB\eE A\eB A\eB
|
\eQA\eB\eE A\eB A\eB
|
||||||
\eQ\e\eE \e \e\eE
|
\eQ\e\eE \e \e\eE
|
||||||
.sp
|
.sp
|
||||||
The \eQ...\eE sequence is recognized both inside and outside character classes
|
The \eQ...\eE sequence is recognized both inside and outside character classes
|
||||||
by both PCRE2 and Perl.
|
by both PCRE2 and Perl.
|
||||||
.P
|
.P
|
||||||
7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
|
7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
|
||||||
|
@ -132,8 +132,8 @@ always matches an upper case letter. I think Perl has changed in this respect;
|
||||||
in the release at the time of writing (5.32), \ep{Lu} and \ep{Ll} match all
|
in the release at the time of writing (5.32), \ep{Lu} and \ep{Ll} match all
|
||||||
letters, regardless of case, when case independence is specified.
|
letters, regardless of case, when case independence is specified.
|
||||||
.P
|
.P
|
||||||
16. From release 5.32.0, Perl locks out the use of \eK in lookaround
|
16. From release 5.32.0, Perl locks out the use of \eK in lookaround
|
||||||
assertions. In PCRE2, \eK is acted on when it occurs in positive assertions,
|
assertions. In PCRE2, \eK is acted on when it occurs in positive assertions,
|
||||||
but is ignored in negative assertions.
|
but is ignored in negative assertions.
|
||||||
.P
|
.P
|
||||||
17. PCRE2 provides some extensions to the Perl regular expression facilities.
|
17. PCRE2 provides some extensions to the Perl regular expression facilities.
|
||||||
|
|
|
@ -79,7 +79,7 @@ matching substrings, or if \fB--only-matching\fP, \fB--file-offsets\fP, or
|
||||||
(either shown literally, or as an offset), scanning resumes immediately
|
(either shown literally, or as an offset), scanning resumes immediately
|
||||||
following the match, so that further matches on the same line can be found. If
|
following the match, so that further matches on the same line can be found. If
|
||||||
there are multiple patterns, they are all tried on the remainder of the line,
|
there are multiple patterns, they are all tried on the remainder of the line,
|
||||||
but patterns that follow the one that matched are not tried on the earlier
|
but patterns that follow the one that matched are not tried on the earlier
|
||||||
matched part of the line.
|
matched part of the line.
|
||||||
.P
|
.P
|
||||||
This behaviour means that the order in which multiple patterns are specified
|
This behaviour means that the order in which multiple patterns are specified
|
||||||
|
@ -422,7 +422,7 @@ a separate line. Searching normally stops as soon as a matching line is found
|
||||||
in a file. However, if the \fB-c\fP (count) option is also used, matching
|
in a file. However, if the \fB-c\fP (count) option is also used, matching
|
||||||
continues in order to obtain the correct count, and those files that have at
|
continues in order to obtain the correct count, and those files that have at
|
||||||
least one match are listed along with their counts. Using this option with
|
least one match are listed along with their counts. Using this option with
|
||||||
\fB-c\fP is a way of suppressing the listing of files with no matches that
|
\fB-c\fP is a way of suppressing the listing of files with no matches that
|
||||||
occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP,
|
occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP,
|
||||||
\fB-h\fP, or \fB-L\fP options.
|
\fB-h\fP, or \fB-L\fP options.
|
||||||
.TP
|
.TP
|
||||||
|
@ -489,12 +489,12 @@ large processing buffer, this should not be a problem, but the \fB-M\fP option
|
||||||
does not work when input is read line by line (see \fB--line-buffered\fP.)
|
does not work when input is read line by line (see \fB--line-buffered\fP.)
|
||||||
.TP
|
.TP
|
||||||
\fB-m\fP \fInumber\fP, \fB--max-count\fP=\fInumber\fP
|
\fB-m\fP \fInumber\fP, \fB--max-count\fP=\fInumber\fP
|
||||||
Stop processing after finding \fInumber\fP matching lines, or non-matching
|
Stop processing after finding \fInumber\fP matching lines, or non-matching
|
||||||
lines if \fB-v\fP is also set. Any trailing context lines are output after the
|
lines if \fB-v\fP is also set. Any trailing context lines are output after the
|
||||||
final match. In multiline mode, each multiline match counts as just one line
|
final match. In multiline mode, each multiline match counts as just one line
|
||||||
for this purpose. If this limit is reached when reading the standard input from
|
for this purpose. If this limit is reached when reading the standard input from
|
||||||
a regular file, the file is left positioned just after the last matching line.
|
a regular file, the file is left positioned just after the last matching line.
|
||||||
If \fB-c\fP is also set, the count that is output is never greater than
|
If \fB-c\fP is also set, the count that is output is never greater than
|
||||||
\fInumber\fP. This option has no effect if used with \fB-L\fP, \fB-l\fP, or
|
\fInumber\fP. This option has no effect if used with \fB-L\fP, \fB-l\fP, or
|
||||||
\fB-q\fP, or when just checking for a match in a binary file.
|
\fB-q\fP, or when just checking for a match in a binary file.
|
||||||
.TP
|
.TP
|
||||||
|
@ -598,7 +598,7 @@ newline; $r by carriage return; $t by tab; $v by vertical tab.
|
||||||
.sp
|
.sp
|
||||||
$o<digits> or $o{<digits>} is replaced by the character whose code point is the
|
$o<digits> or $o{<digits>} is replaced by the character whose code point is the
|
||||||
given octal number. In the first form, up to three octal digits are processed.
|
given octal number. In the first form, up to three octal digits are processed.
|
||||||
When more digits are needed in Unicode mode to specify a wide character, the
|
When more digits are needed in Unicode mode to specify a wide character, the
|
||||||
second form must be used.
|
second form must be used.
|
||||||
.sp
|
.sp
|
||||||
$x<digits> or $x{<digits>} is replaced by the character represented by the
|
$x<digits> or $x{<digits>} is replaced by the character represented by the
|
||||||
|
@ -686,12 +686,12 @@ total would always be zero.
|
||||||
Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
|
Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
|
||||||
with UTF-8 support. All patterns (including those for any \fB--exclude\fP and
|
with UTF-8 support. All patterns (including those for any \fB--exclude\fP and
|
||||||
\fB--include\fP options) and all lines that are scanned must be valid strings
|
\fB--include\fP options) and all lines that are scanned must be valid strings
|
||||||
of UTF-8 characters. If an invalid UTF-8 string is encountered, an error
|
of UTF-8 characters. If an invalid UTF-8 string is encountered, an error
|
||||||
occurs.
|
occurs.
|
||||||
.TP
|
.TP
|
||||||
\fB-U\fP, \fB--utf-allow-invalid\fP
|
\fB-U\fP, \fB--utf-allow-invalid\fP
|
||||||
As \fB--utf\fP, but in addition subject lines may contain invalid UTF-8 code
|
As \fB--utf\fP, but in addition subject lines may contain invalid UTF-8 code
|
||||||
unit sequences. These can never form part of any pattern match. Patterns
|
unit sequences. These can never form part of any pattern match. Patterns
|
||||||
themselves, however, must still be valid UTF-8 strings. This facility allows
|
themselves, however, must still be valid UTF-8 strings. This facility allows
|
||||||
valid UTF-8 strings to be sought within arbitrary byte sequences in executable
|
valid UTF-8 strings to be sought within arbitrary byte sequences in executable
|
||||||
or other binary files. For more details about matching in non-valid UTF-8
|
or other binary files. For more details about matching in non-valid UTF-8
|
||||||
|
@ -708,7 +708,7 @@ ignored.
|
||||||
.TP
|
.TP
|
||||||
\fB-v\fP, \fB--invert-match\fP
|
\fB-v\fP, \fB--invert-match\fP
|
||||||
Invert the sense of the match, so that lines which do \fInot\fP match any of
|
Invert the sense of the match, so that lines which do \fInot\fP match any of
|
||||||
the patterns are the ones that are found. When this option is set, options such
|
the patterns are the ones that are found. When this option is set, options such
|
||||||
as \fB--only-matching\fP and \fB--output\fP, which specify parts of a match
|
as \fB--only-matching\fP and \fB--output\fP, which specify parts of a match
|
||||||
that are to be output, are ignored.
|
that are to be output, are ignored.
|
||||||
.TP
|
.TP
|
||||||
|
@ -855,7 +855,7 @@ output string, so if you want a newline, you must include it explicitly using
|
||||||
the escape $n. For example:
|
the escape $n. For example:
|
||||||
.sp
|
.sp
|
||||||
pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' <some file>
|
pcre2grep '(.)(..(.))(?C"|[$1] [$2] [$3]$n")' <some file>
|
||||||
.sp
|
.sp
|
||||||
Matching continues normally after the string is output. If you want to see only
|
Matching continues normally after the string is output. If you want to see only
|
||||||
the callout output but not any output from an actual match, you should end the
|
the callout output but not any output from an actual match, you should end the
|
||||||
pattern with (*FAIL).
|
pattern with (*FAIL).
|
||||||
|
|
|
@ -309,7 +309,7 @@ part of the pattern. If the PCRE2_EXTENDED_MORE option is set, the same
|
||||||
applies, but in addition unescaped space and horizontal tab characters are
|
applies, but in addition unescaped space and horizontal tab characters are
|
||||||
ignored inside a character class. Note: only these two characters are ignored,
|
ignored inside a character class. Note: only these two characters are ignored,
|
||||||
not the full set of pattern white space characters that are ignored outside a
|
not the full set of pattern white space characters that are ignored outside a
|
||||||
character class. Option settings can be changed within a pattern; see the
|
character class. Option settings can be changed within a pattern; see the
|
||||||
section entitled
|
section entitled
|
||||||
.\" HTML <a href="#internaloptions">
|
.\" HTML <a href="#internaloptions">
|
||||||
.\" </a>
|
.\" </a>
|
||||||
|
|
|
@ -372,7 +372,7 @@ patterns. Modifiers on a pattern can change these settings.
|
||||||
This line is used in test files that can also be processed by \fBperltest.sh\fP
|
This line is used in test files that can also be processed by \fBperltest.sh\fP
|
||||||
to confirm that Perl gives the same results as PCRE2. Subsequent tests are
|
to confirm that Perl gives the same results as PCRE2. Subsequent tests are
|
||||||
checked for the use of \fBpcre2test\fP features that are incompatible with the
|
checked for the use of \fBpcre2test\fP features that are incompatible with the
|
||||||
\fBperltest.sh\fP script.
|
\fBperltest.sh\fP script.
|
||||||
.P
|
.P
|
||||||
Patterns must use '/' as their delimiter, and only certain modifiers are
|
Patterns must use '/' as their delimiter, and only certain modifiers are
|
||||||
supported. Comment lines, #pattern commands, and #subject commands that set or
|
supported. Comment lines, #pattern commands, and #subject commands that set or
|
||||||
|
|
|
@ -224,7 +224,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
#define PACKAGE_NAME "PCRE2"
|
#define PACKAGE_NAME "PCRE2"
|
||||||
|
|
||||||
/* Define to the full name and version of this package. */
|
/* Define to the full name and version of this package. */
|
||||||
#define PACKAGE_STRING "PCRE2 10.35"
|
#define PACKAGE_STRING "PCRE2 10.36-RC1"
|
||||||
|
|
||||||
/* Define to the one symbol short name of this package. */
|
/* Define to the one symbol short name of this package. */
|
||||||
#define PACKAGE_TARNAME "pcre2"
|
#define PACKAGE_TARNAME "pcre2"
|
||||||
|
@ -233,7 +233,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
#define PACKAGE_URL ""
|
#define PACKAGE_URL ""
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
/* Define to the version of this package. */
|
||||||
#define PACKAGE_VERSION "10.35"
|
#define PACKAGE_VERSION "10.36-RC1"
|
||||||
|
|
||||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||||
|
@ -358,7 +358,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Version number of package */
|
/* Version number of package */
|
||||||
#define VERSION "10.35"
|
#define VERSION "10.36-RC1"
|
||||||
|
|
||||||
/* Define to 1 if on MINIX. */
|
/* Define to 1 if on MINIX. */
|
||||||
/* #undef _MINIX */
|
/* #undef _MINIX */
|
||||||
|
|
|
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
/* The current PCRE version information. */
|
/* The current PCRE version information. */
|
||||||
|
|
||||||
#define PCRE2_MAJOR 10
|
#define PCRE2_MAJOR 10
|
||||||
#define PCRE2_MINOR 35
|
#define PCRE2_MINOR 36
|
||||||
#define PCRE2_PRERELEASE
|
#define PCRE2_PRERELEASE -RC1
|
||||||
#define PCRE2_DATE 2020-05-09
|
#define PCRE2_DATE 2020-11-05
|
||||||
|
|
||||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||||
imported have to be identified as such. When building PCRE2, the appropriate
|
imported have to be identified as such. When building PCRE2, the appropriate
|
||||||
|
|
|
@ -63,7 +63,7 @@ given, they are written in binary. */
|
||||||
#include "pcre2_maketables.c"
|
#include "pcre2_maketables.c"
|
||||||
|
|
||||||
|
|
||||||
static char *classlist[] =
|
static const char *classlist[] =
|
||||||
{
|
{
|
||||||
"space", "xdigit", "digit", "upper", "lower",
|
"space", "xdigit", "digit", "upper", "lower",
|
||||||
"word", "graph", "print", "punct", "cntrl"
|
"word", "graph", "print", "punct", "cntrl"
|
||||||
|
@ -97,7 +97,7 @@ FILE *f;
|
||||||
int i;
|
int i;
|
||||||
int nclass = 0;
|
int nclass = 0;
|
||||||
BOOL binary = FALSE;
|
BOOL binary = FALSE;
|
||||||
char *env = "C";
|
char *env = (char *)"C";
|
||||||
const unsigned char *tables;
|
const unsigned char *tables;
|
||||||
const unsigned char *base_of_tables;
|
const unsigned char *base_of_tables;
|
||||||
|
|
||||||
|
@ -105,7 +105,7 @@ const unsigned char *base_of_tables;
|
||||||
|
|
||||||
for (i = 1; i < argc; i++)
|
for (i = 1; i < argc; i++)
|
||||||
{
|
{
|
||||||
unsigned char *arg = (unsigned char *)argv[i];
|
char *arg = argv[i];
|
||||||
if (*arg != '-') break;
|
if (*arg != '-') break;
|
||||||
|
|
||||||
if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
|
if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
|
||||||
|
|
|
@ -7192,7 +7192,7 @@ if (utf && end_subject != true_end_subject &&
|
||||||
starting code units in 8-bit and 16-bit modes. */
|
starting code units in 8-bit and 16-bit modes. */
|
||||||
|
|
||||||
start_match = end_subject + 1;
|
start_match = end_subject + 1;
|
||||||
|
|
||||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
|
while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
|
||||||
start_match++;
|
start_match++;
|
||||||
|
|
|
@ -2070,7 +2070,7 @@ if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
|
||||||
uint32_t max = utf? 0x0010ffffu : 0xffu;
|
uint32_t max = utf? 0x0010ffffu : 0xffu;
|
||||||
if (*value > max)
|
if (*value > max)
|
||||||
{
|
{
|
||||||
if (!callout)
|
if (!callout)
|
||||||
fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
|
fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
|
||||||
"code point greater than 0x%x is invalid\n", (int)(string - begin), max);
|
"code point greater than 0x%x is invalid\n", (int)(string - begin), max);
|
||||||
rc = DDE_ERROR;
|
rc = DDE_ERROR;
|
||||||
|
@ -2376,8 +2376,8 @@ while (length > 0)
|
||||||
if (value == STDOUT_NL_CODE)
|
if (value == STDOUT_NL_CODE)
|
||||||
{
|
{
|
||||||
memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
|
memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
|
||||||
argsptr += STDOUT_NL_LEN;
|
argsptr += STDOUT_NL_LEN;
|
||||||
}
|
}
|
||||||
else if (utf && value > 127)
|
else if (utf && value > 127)
|
||||||
{
|
{
|
||||||
int n = ord2utf8(value);
|
int n = ord2utf8(value);
|
||||||
|
|
|
@ -5141,8 +5141,8 @@ PCRE2_SIZE erroroffset;
|
||||||
if (restrict_for_perl_test && delimiter != '/')
|
if (restrict_for_perl_test && delimiter != '/')
|
||||||
{
|
{
|
||||||
fprintf(outfile, "** The only allowed delimiter after #perltest is '/'\n");
|
fprintf(outfile, "** The only allowed delimiter after #perltest is '/'\n");
|
||||||
return PR_ABEND;
|
return PR_ABEND;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Initialize the context and pattern/data controls for this test from the
|
/* Initialize the context and pattern/data controls for this test from the
|
||||||
defaults. */
|
defaults. */
|
||||||
|
|
Loading…
Reference in New Issue