File tidies and documentation update for 10.32-RC1 Release Candidate.
This commit is contained in:
parent
6fe70cda7f
commit
392974a0cb
|
@ -117,7 +117,7 @@ CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H)
|
|||
|
||||
IF(HAVE_INTTYPES_H)
|
||||
SET(PCRE2_HAVE_INTTYPES_H 1)
|
||||
ELSE(HAVE_INTTYPES_H)
|
||||
ELSE(HAVE_INTTYPES_H)
|
||||
SET(PCRE2_HAVE_INTTYPES_H 0)
|
||||
ENDIF(HAVE_INTTYPES_H)
|
||||
|
||||
|
|
122
ChangeLog
122
ChangeLog
|
@ -2,30 +2,30 @@ Change Log for PCRE2
|
|||
--------------------
|
||||
|
||||
|
||||
Version 10.32-RC1 19-February-2018
|
||||
----------------------------------
|
||||
Version 10.32-RC1 13-August-2018
|
||||
--------------------------------
|
||||
|
||||
1. When matching using the the REG_STARTEND feature of the POSIX API with a
|
||||
1. When matching using the the REG_STARTEND feature of the POSIX API with a
|
||||
non-zero starting offset, unset capturing groups with lower numbers than a
|
||||
group that did capture something were not being correctly returned as "unset"
|
||||
group that did capture something were not being correctly returned as "unset"
|
||||
(that is, with offset values of -1).
|
||||
|
||||
2. When matching using the POSIX API, pcre2test used to omit listing unset
|
||||
2. When matching using the POSIX API, pcre2test used to omit listing unset
|
||||
groups altogether. Now it shows those that come before any actual captures as
|
||||
"<unset>", as happens for non-POSIX matching.
|
||||
|
||||
3. Running "pcre2test -C" always stated "\R matches CR, LF, or CRLF only",
|
||||
whatever the build configuration was. It now correctly says "\R matches all
|
||||
Unicode newlines" in the default case when --enable-bsr-anycrlf has not been
|
||||
specified. Similarly, running "pcre2test -C bsr" never produced the result
|
||||
3. Running "pcre2test -C" always stated "\R matches CR, LF, or CRLF only",
|
||||
whatever the build configuration was. It now correctly says "\R matches all
|
||||
Unicode newlines" in the default case when --enable-bsr-anycrlf has not been
|
||||
specified. Similarly, running "pcre2test -C bsr" never produced the result
|
||||
ANY.
|
||||
|
||||
4. Matching the pattern /(*UTF)\C[^\v]+\x80/ against an 8-bit string containing
|
||||
multi-code-unit characters caused bad behaviour and possibly a crash. This
|
||||
4. Matching the pattern /(*UTF)\C[^\v]+\x80/ against an 8-bit string containing
|
||||
multi-code-unit characters caused bad behaviour and possibly a crash. This
|
||||
issue was fixed for other kinds of repeat in release 10.20 by change 19, but
|
||||
repeating character classes were overlooked.
|
||||
|
||||
5. pcre2grep now supports the inclusion of binary zeros in patterns that are
|
||||
5. pcre2grep now supports the inclusion of binary zeros in patterns that are
|
||||
read from files via the -f option.
|
||||
|
||||
6. A small fix to pcre2grep to avoid compiler warnings for -Wformat-overflow=2.
|
||||
|
@ -43,79 +43,79 @@ offset is set zero for early errors.
|
|||
|
||||
10. A number of patches for Windows support from Daniel Richard G:
|
||||
|
||||
(a) List of error numbers in Runtest.bat corrected (it was not the same as in
|
||||
Runtest).
|
||||
|
||||
(b) pcre2grep snprintf() workaround as used elsewhere in the tree.
|
||||
|
||||
(c) Support for non-C99 snprintf() that returns -1 in the overflow case.
|
||||
|
||||
11. Minor tidy of pcre2_dfa_match() code.
|
||||
(a) List of error numbers in Runtest.bat corrected (it was not the same as in
|
||||
Runtest).
|
||||
|
||||
12. Refactored pcre2_dfa_match() so that the internal recursive calls no longer
|
||||
use the stack for local workspace and local ovectors. Instead, an initial block
|
||||
of stack is reserved, but if this is insufficient, heap memory is used. The
|
||||
(b) pcre2grep snprintf() workaround as used elsewhere in the tree.
|
||||
|
||||
(c) Support for non-C99 snprintf() that returns -1 in the overflow case.
|
||||
|
||||
11. Minor tidy of pcre2_dfa_match() code.
|
||||
|
||||
12. Refactored pcre2_dfa_match() so that the internal recursive calls no longer
|
||||
use the stack for local workspace and local ovectors. Instead, an initial block
|
||||
of stack is reserved, but if this is insufficient, heap memory is used. The
|
||||
heap limit parameter now applies to pcre2_dfa_match().
|
||||
|
||||
13. If a "find limits" test of DFA matching in pcre2test resulted in too many
|
||||
13. If a "find limits" test of DFA matching in pcre2test resulted in too many
|
||||
matches for the ovector, no matches were displayed.
|
||||
|
||||
14. Removed an occurrence of ctrl/Z from test 6 because Windows treats it as
|
||||
14. Removed an occurrence of ctrl/Z from test 6 because Windows treats it as
|
||||
EOF. The test looks to have come from a fuzzer.
|
||||
|
||||
15. If PCRE2 was built with a default match limit a lot greater than the
|
||||
15. If PCRE2 was built with a default match limit a lot greater than the
|
||||
default default of 10 000 000, some JIT tests of the match limit no longer
|
||||
failed. All such tests now set 10 000 000 as the upper limit.
|
||||
|
||||
16. Another Windows related patch for pcregrep to ensure that WIN32 is
|
||||
undefiined under Cygwin.
|
||||
16. Another Windows related patch for pcregrep to ensure that WIN32 is
|
||||
undefined under Cygwin.
|
||||
|
||||
17. Test for the presence of stdint.h and inttypes.h in configure and CMake and
|
||||
include whichever exists (stdint preferred) instead of unconditionally
|
||||
17. Test for the presence of stdint.h and inttypes.h in configure and CMake and
|
||||
include whichever exists (stdint preferred) instead of unconditionally
|
||||
including stdint. This makes life easier for old and non-standard systems.
|
||||
|
||||
18. Further changes to improve portability, especially to old and or non-
|
||||
standard systems:
|
||||
|
||||
(a) Put all printf arguments in RunGrepTest into single, not double, quotes,
|
||||
and use \0 not \x00 for binary zero.
|
||||
|
||||
and use \0 not \x00 for binary zero.
|
||||
|
||||
(b) Avoid the use of C++ (i.e. BCPL) // comments.
|
||||
|
||||
|
||||
(c) Parameterize the use of %zu in pcre2test to make it like %td. For both of
|
||||
these now, if using MSVC or a standard C before C99, %lu is used with a
|
||||
cast if necessary.
|
||||
|
||||
19. Applied a contributed patch to CMakeLists.txt to increase the stack size
|
||||
these now, if using MSVC or a standard C before C99, %lu is used with a
|
||||
cast if necessary.
|
||||
|
||||
19. Applied a contributed patch to CMakeLists.txt to increase the stack size
|
||||
when linking pcre2test with MSVC. This gets rid of a stack overflow error in
|
||||
the standard set of tests.
|
||||
|
||||
20. Output a warning in pcre2test when ignoring the "altglobal" modifier when
|
||||
it is given with the "replace" modifier.
|
||||
|
||||
21. In both pcre2test and pcre2_substitute(), with global matching, a pattern
|
||||
that matched an empty string, but never at the starting match offset, was not
|
||||
handled in a Perl-compatible way. The pattern /(<?=\G.)/ is an example of such
|
||||
a pattern. Because \G is in a lookbehind assertion, there has to be a
|
||||
"bumpalong" before there can be a match. The automatic "advance by one
|
||||
character after an empty string match" rule is therefore inappropriate. A more
|
||||
21. In both pcre2test and pcre2_substitute(), with global matching, a pattern
|
||||
that matched an empty string, but never at the starting match offset, was not
|
||||
handled in a Perl-compatible way. The pattern /(<?=\G.)/ is an example of such
|
||||
a pattern. Because \G is in a lookbehind assertion, there has to be a
|
||||
"bumpalong" before there can be a match. The automatic "advance by one
|
||||
character after an empty string match" rule is therefore inappropriate. A more
|
||||
complicated algorithm has now been implemented.
|
||||
|
||||
22. When checking to see if a lookbehind is of fixed length, lookaheads were
|
||||
correctly ignored, but qualifiers on lookaheads were not being ignored, leading
|
||||
22. When checking to see if a lookbehind is of fixed length, lookaheads were
|
||||
correctly ignored, but qualifiers on lookaheads were not being ignored, leading
|
||||
to an incorrect "lookbehind assertion is not fixed length" error.
|
||||
|
||||
23. The VERSION condition test was reading fractional PCRE2 version numbers
|
||||
23. The VERSION condition test was reading fractional PCRE2 version numbers
|
||||
such as the 04 in 10.04 incorrectly and hence giving wrong results.
|
||||
|
||||
24. Updated to Unicode version 11.0.0. As well as the usual addition of new
|
||||
24. Updated to Unicode version 11.0.0. As well as the usual addition of new
|
||||
scripts and characters, this involved re-jigging the grapheme break property
|
||||
algorithm because Unicode has changed the way emojis are handled.
|
||||
|
||||
25. Fixed an obscure bug that struck when there were two atomic groups not
|
||||
separated by something with a backtracking point. There could be an incorrect
|
||||
25. Fixed an obscure bug that struck when there were two atomic groups not
|
||||
separated by something with a backtracking point. There could be an incorrect
|
||||
backtrack into the first of the atomic groups. A complicated example is
|
||||
/(?>a(*:1))(?>b)(*SKIP:1)x|.*/ matched against "abc", where the *SKIP
|
||||
/(?>a(*:1))(?>b)(*SKIP:1)x|.*/ matched against "abc", where the *SKIP
|
||||
shouldn't find a MARK (because is in an atomic group), but it did.
|
||||
|
||||
26. Upgraded the perltest.sh script: (1) #pattern lines can now be used to set
|
||||
|
@ -125,9 +125,9 @@ default "mark" modifier; (3) Unsupported #command lines give a warning when
|
|||
they are ignored; (4) Mark data is output only if the "mark" modifier is
|
||||
present.
|
||||
|
||||
27. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported.
|
||||
27. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported.
|
||||
|
||||
28. A (*MARK) name was not being passed back for positive assertions that were
|
||||
28. A (*MARK) name was not being passed back for positive assertions that were
|
||||
terminated by (*ACCEPT).
|
||||
|
||||
29. Add support for \N{U+dddd}, but not in EBCDIC environments.
|
||||
|
@ -141,20 +141,20 @@ Now, when Unicode support is compiled, PCRE2_EXTENDED also discards U+0085,
|
|||
U+200E, U+200F, U+2028, and U+2029, which are additional characters defined by
|
||||
Unicode as "Pattern White Space". This makes PCRE2 compatible with Perl.
|
||||
|
||||
32. In certain circumstances, option settings within patterns were not being
|
||||
correctly processed. For example, the pattern /((?i)A)(?m)B/ incorrectly
|
||||
matched "ab". (The (?m) setting lost the fact that (?i) should be reset at the
|
||||
end of its group during the parse process, but without another setting such as
|
||||
32. In certain circumstances, option settings within patterns were not being
|
||||
correctly processed. For example, the pattern /((?i)A)(?m)B/ incorrectly
|
||||
matched "ab". (The (?m) setting lost the fact that (?i) should be reset at the
|
||||
end of its group during the parse process, but without another setting such as
|
||||
(?m) the compile phase got it right.) This bug was introduced by the
|
||||
refactoring in release 10.23.
|
||||
|
||||
33. PCRE2 uses bcopy() if available when memmove() is not, and it used just to
|
||||
33. PCRE2 uses bcopy() if available when memmove() is not, and it used just to
|
||||
define memmove() as function call to bcopy(). This hasn't been tested for a
|
||||
long time because in pcre2test the result of memmove() was being used, whereas
|
||||
bcopy() doesn't return a result. This feature is now refactored always to call
|
||||
an emulation function when there is no memmove(). The emulation makes use of
|
||||
long time because in pcre2test the result of memmove() was being used, whereas
|
||||
bcopy() doesn't return a result. This feature is now refactored always to call
|
||||
an emulation function when there is no memmove(). The emulation makes use of
|
||||
bcopy() when available.
|
||||
|
||||
|
||||
|
||||
Version 10.31 12-February-2018
|
||||
------------------------------
|
||||
|
|
26
NEWS
26
NEWS
|
@ -1,6 +1,32 @@
|
|||
News about PCRE2 releases
|
||||
-------------------------
|
||||
|
||||
Version 10.32 13-August-2018
|
||||
----------------------------
|
||||
|
||||
This is another mainly bugfix and tidying release with a few minor
|
||||
enhancements.
|
||||
|
||||
1. pcre2grep now supports the inclusion of binary zeros in patterns that are
|
||||
read from files via the -f option.
|
||||
|
||||
2. ./configure now supports --enable-jit=auto, which automatically enables JIT
|
||||
if the hardware supports it.
|
||||
|
||||
3. In pcre2_dfa_match(), internal recursive calls no longer use the stack for
|
||||
local workspace and local ovectors. Instead, an initial block of stack is
|
||||
reserved, but if this is insufficient, heap memory is used. The heap limit
|
||||
parameter now applies to pcre2_dfa_match().
|
||||
|
||||
4. Updated to Unicode version 11.0.0.
|
||||
|
||||
5. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported.
|
||||
|
||||
6. Added support for \N{U+dddd}, but not in EBCDIC environments.
|
||||
|
||||
7. Added support for (?^) to unset all imnsx options.
|
||||
|
||||
|
||||
Version 10.31 12-February-2018
|
||||
------------------------------
|
||||
|
||||
|
|
|
@ -329,7 +329,7 @@ cache can be deleted by selecting "File > Delete Cache".
|
|||
most recent build configuration is targeted by the tests. A summary of
|
||||
test results is presented. Complete test output is subsequently
|
||||
available for review in Testing\Temporary under your build dir.
|
||||
|
||||
|
||||
|
||||
BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO
|
||||
|
||||
|
|
|
@ -4,10 +4,10 @@
|
|||
# itself. What we are checking here is the file handling and options that are
|
||||
# supported by pcre2grep. This script must be run in the build directory.
|
||||
|
||||
# CODING CONVENTIONS:
|
||||
# CODING CONVENTIONS:
|
||||
# * Put printf arguments in single, not double quotes to avoid unwanted
|
||||
# escaping.
|
||||
# * Use \0 for binary zero in printf, not \x0, for the benefit of older
|
||||
# * Use \0 for binary zero in printf, not \x0, for the benefit of older
|
||||
# versions.
|
||||
|
||||
# Set the C locale, so that sort(1) behaves predictably.
|
||||
|
|
10
configure.ac
10
configure.ac
|
@ -11,16 +11,16 @@ dnl be defined as -RC2, for example. For real releases, it should be empty.
|
|||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [32])
|
||||
m4_define(pcre2_prerelease, [-RC1])
|
||||
m4_define(pcre2_date, [2018-02-19])
|
||||
m4_define(pcre2_date, [2018-08-13])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre2_8_version, [7:0:7])
|
||||
m4_define(libpcre2_16_version, [7:0:7])
|
||||
m4_define(libpcre2_32_version, [7:0:7])
|
||||
m4_define(libpcre2_posix_version, [2:0:0])
|
||||
m4_define(libpcre2_8_version, [7:1:7])
|
||||
m4_define(libpcre2_16_version, [7:1:7])
|
||||
m4_define(libpcre2_32_version, [7:1:7])
|
||||
m4_define(libpcre2_posix_version, [2:1:0])
|
||||
|
||||
AC_PREREQ(2.57)
|
||||
AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2)
|
||||
|
|
|
@ -329,7 +329,7 @@ cache can be deleted by selecting "File > Delete Cache".
|
|||
most recent build configuration is targeted by the tests. A summary of
|
||||
test results is presented. Complete test output is subsequently
|
||||
available for review in Testing\Temporary under your build dir.
|
||||
|
||||
|
||||
|
||||
BUILDING PCRE2 ON WINDOWS WITH VISUAL STUDIO
|
||||
|
||||
|
|
|
@ -23,11 +23,11 @@ please consult the man page, in case the conversion went wrong.
|
|||
<P>
|
||||
PCRE2 is the name used for a revised API for the PCRE library, which is a set
|
||||
of functions, written in C, that implement regular expression pattern matching
|
||||
using the same syntax and semantics as Perl, with just a few differences. After
|
||||
nearly two decades, the limitations of the original API were making development
|
||||
using the same syntax and semantics as Perl, with just a few differences. After
|
||||
nearly two decades, the limitations of the original API were making development
|
||||
increasingly difficult. The new API is more extensible, and it was simplified
|
||||
by abolishing the separate "study" optimizing function; in PCRE2, patterns are
|
||||
automatically optimized where possible. Since forking from PCRE1, the code has
|
||||
automatically optimized where possible. Since forking from PCRE1, the code has
|
||||
been extensively refactored and new features introduced.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -164,7 +164,7 @@ listing), and the short pages for individual functions, are concatenated in
|
|||
pcre2build building PCRE2
|
||||
pcre2callout details of the callout feature
|
||||
pcre2compat discussion of Perl compatibility
|
||||
pcre2convert details of pattern conversion functions
|
||||
pcre2convert details of pattern conversion functions
|
||||
pcre2demo a demonstration C program that uses PCRE2
|
||||
pcre2grep description of the <b>pcre2grep</b> command (8-bit only)
|
||||
pcre2jit discussion of just-in-time optimization support
|
||||
|
@ -175,7 +175,7 @@ listing), and the short pages for individual functions, are concatenated in
|
|||
pcre2perform discussion of performance issues
|
||||
pcre2posix the POSIX-compatible C API for the 8-bit library
|
||||
pcre2sample discussion of the pcre2demo program
|
||||
pcre2serialize details of pattern serialization
|
||||
pcre2serialize details of pattern serialization
|
||||
pcre2syntax quick syntax reference
|
||||
pcre2test description of the <b>pcre2test</b> command
|
||||
pcre2unicode discussion of Unicode and UTF support
|
||||
|
|
|
@ -28,7 +28,7 @@ DESCRIPTION
|
|||
This function is part of an experimental set of pattern conversion functions.
|
||||
It frees the memory occupied by a convert context, using the memory
|
||||
freeing function from the general context with which it was created, or
|
||||
<b>free()</b> if that was not set. If the argument is NULL, the function returns
|
||||
<b>free()</b> if that was not set. If the argument is NULL, the function returns
|
||||
immediately without doing anything.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -28,7 +28,7 @@ DESCRIPTION
|
|||
This function is part of an experimental set of pattern conversion functions.
|
||||
It frees the memory occupied by a converted pattern that was obtained by
|
||||
calling <b>pcre2_pattern_convert()</b> with arguments that caused it to place
|
||||
the converted pattern into newly obtained heap memory. If the argument is NULL,
|
||||
the converted pattern into newly obtained heap memory. If the argument is NULL,
|
||||
the function returns immediately without doing anything.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -38,8 +38,8 @@ passed to a matching function. The arguments of this function are:
|
|||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
If <i>mcontext</i> is NULL, the function returns immediately, without doing
|
||||
anything.
|
||||
If <i>mcontext</i> is NULL, the function returns immediately, without doing
|
||||
anything.
|
||||
</P>
|
||||
<P>
|
||||
If <i>callback</i> is NULL and <i>callback_data</i> is NULL, an internal 32KiB
|
||||
|
|
|
@ -26,7 +26,7 @@ DESCRIPTION
|
|||
</b><br>
|
||||
<P>
|
||||
This function is used to free a JIT stack that was created by
|
||||
<b>pcre2_jit_stack_create()</b> when it is no longer needed. If the argument is
|
||||
<b>pcre2_jit_stack_create()</b> when it is no longer needed. If the argument is
|
||||
NULL, the function returns immediately without doing anything. For more
|
||||
details, see the
|
||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||
|
|
|
@ -28,7 +28,7 @@ DESCRIPTION
|
|||
</b><br>
|
||||
<P>
|
||||
This function encodes a list of compiled patterns into a byte stream that can
|
||||
be saved on disc or elsewhere. Note that this is not an abstract format like
|
||||
be saved on disc or elsewhere. Note that this is not an abstract format like
|
||||
Java or .NET. Conversion of the byte stream back into usable compiled patterns
|
||||
can only happen on a host that is running the same version of PCRE2, with the
|
||||
same code unit width, and the host must also have the same endianness, pointer
|
||||
|
|
|
@ -27,7 +27,7 @@ DESCRIPTION
|
|||
<P>
|
||||
This function frees the memory that was obtained by
|
||||
<b>pcre2_serialize_encode()</b> to hold a serialized byte stream. The argument
|
||||
must point to such a byte stream or be NULL, in which case the function returns
|
||||
must point to such a byte stream or be NULL, in which case the function returns
|
||||
without doing anything.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -27,7 +27,7 @@ DESCRIPTION
|
|||
<P>
|
||||
This is a convenience function for freeing the store obtained by a previous
|
||||
call to <b>pcre2substring_list_get()</b>. Its only argument is a pointer to
|
||||
the list of string pointers. If the argument is NULL, the function returns
|
||||
the list of string pointers. If the argument is NULL, the function returns
|
||||
immediately, without doing anything.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -518,8 +518,8 @@ been matched by <b>pcre2_match()</b>. They are:
|
|||
<b>pcre2_substring_number_from_name()</b>
|
||||
</pre>
|
||||
<b>pcre2_substring_free()</b> and <b>pcre2_substring_list_free()</b> are also
|
||||
provided, to free memory used for extracted strings. If either of these
|
||||
functions is called with a NULL argument, the function returns immediately
|
||||
provided, to free memory used for extracted strings. If either of these
|
||||
functions is called with a NULL argument, the function returns immediately
|
||||
without doing anything.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -1255,7 +1255,7 @@ If the compile context argument <i>ccontext</i> is NULL, memory for the compiled
|
|||
pattern is obtained by calling <b>malloc()</b>. Otherwise, it is obtained from
|
||||
the same memory function that was used for the compile context. The caller must
|
||||
free the memory by calling <b>pcre2_code_free()</b> when it is no longer needed.
|
||||
If <b>pcre2_code_free()</b> is called with a NULL argument, it returns
|
||||
If <b>pcre2_code_free()</b> is called with a NULL argument, it returns
|
||||
immediately, without doing anything.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -1265,7 +1265,7 @@ if the code has been processed by the JIT compiler (see
|
|||
<a href="#jitcompiling">below),</a>
|
||||
the JIT information cannot be copied (because it is position-dependent).
|
||||
The new copy can initially be used only for non-JIT matching, though it can be
|
||||
passed to <b>pcre2_jit_compile()</b> if required. If <b>pcre2_code_copy()</b> is
|
||||
passed to <b>pcre2_jit_compile()</b> if required. If <b>pcre2_code_copy()</b> is
|
||||
called with a NULL argument, it returns NULL.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -1514,15 +1514,15 @@ PCRE2_EXTENDED is equivalent to Perl's /x option, and it can be changed within
|
|||
a pattern by a (?x) option setting.
|
||||
</P>
|
||||
<P>
|
||||
When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recognizes as
|
||||
white space only those characters with code points less than 256 that are
|
||||
flagged as white space in its low-character table. The table is normally
|
||||
created by
|
||||
When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recognizes as
|
||||
white space only those characters with code points less than 256 that are
|
||||
flagged as white space in its low-character table. The table is normally
|
||||
created by
|
||||
<a href="pcre2_maketables.html"><b>pcre2_maketables()</b>,</a>
|
||||
which uses the <b>isspace()</b> function to identify space characters. In most
|
||||
ASCII environments, the relevant characters are those with code points 0x0009
|
||||
(tab), 0x000A (linefeed), 0x000B (vertical tab), 0x000C (formfeed), 0x000D
|
||||
(carriage return), and 0x0020 (space).
|
||||
(carriage return), and 0x0020 (space).
|
||||
</P>
|
||||
<P>
|
||||
When PCRE2 is compiled with Unicode support, in addition to these characters,
|
||||
|
@ -1552,8 +1552,8 @@ built.
|
|||
PCRE2_EXTENDED_MORE
|
||||
</pre>
|
||||
This option has the effect of PCRE2_EXTENDED, but, in addition, unescaped space
|
||||
and horizontal tab characters are ignored inside a character class. Note: only
|
||||
these two characters are ignored, not the full set of pattern white space
|
||||
and horizontal tab characters are ignored inside a character class. Note: only
|
||||
these two characters are ignored, not the full set of pattern white space
|
||||
characters that are ignored outside a character class. PCRE2_EXTENDED_MORE is
|
||||
equivalent to Perl's /xx option, and it can be changed within a pattern by a
|
||||
(?xx) option setting.
|
||||
|
@ -2323,7 +2323,7 @@ reloaded must be running the same version of PCRE2, with the same code unit
|
|||
width, and must also have the same endianness, pointer width, and PCRE2_SIZE
|
||||
type. Before compiled patterns can be saved, they must be converted to a
|
||||
"serialized" form, which in the case of PCRE2 is really just a bytecode dump.
|
||||
The functions whose names begin with <b>pcre2_serialize_</b> are used for
|
||||
The functions whose names begin with <b>pcre2_serialize_</b> are used for
|
||||
converting to and from the serialized form. They are described in the
|
||||
<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
|
||||
documentation. Note that PCRE2 serialization does not convert compiled patterns
|
||||
|
@ -2398,7 +2398,7 @@ match data block (for that match) have taken place.
|
|||
</P>
|
||||
<P>
|
||||
When a match data block itself is no longer needed, it should be freed by
|
||||
calling <b>pcre2_match_data_free()</b>. If this function is called with a NULL
|
||||
calling <b>pcre2_match_data_free()</b>. If this function is called with a NULL
|
||||
argument, it returns immediately, without doing anything.
|
||||
</P>
|
||||
<br><a name="SEC27" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
||||
|
@ -3383,7 +3383,7 @@ replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE
|
|||
(invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket
|
||||
not found), PCRE2_ERROR_BADSUBSTITUTION (syntax error in extended group
|
||||
substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before
|
||||
it started or the match started earlier than the current position in the
|
||||
it started or the match started earlier than the current position in the
|
||||
subject, which can happen if \K is used in an assertion).
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -82,7 +82,7 @@ The following sections include descriptions of "on/off" options whose names
|
|||
begin with --enable or --disable. Because of the way that <b>configure</b>
|
||||
works, --enable and --disable always come in pairs, so the complementary option
|
||||
always exists as well, but as it specifies the default, it is not described.
|
||||
Options that specify values have names that start with --with. At the end of a
|
||||
Options that specify values have names that start with --with. At the end of a
|
||||
<b>configure</b> run, a summary of the configuration is output.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">BUILDING 8-BIT, 16-BIT AND 32-BIT LIBRARIES</a><br>
|
||||
|
@ -171,12 +171,12 @@ Just-in-time (JIT) compiler support is included in the build by specifying
|
|||
--enable-jit
|
||||
</pre>
|
||||
This support is available only for certain hardware architectures. If this
|
||||
option is set for an unsupported architecture, a building error occurs.
|
||||
If in doubt, use
|
||||
option is set for an unsupported architecture, a building error occurs.
|
||||
If in doubt, use
|
||||
<pre>
|
||||
--enable-jit=auto
|
||||
</pre>
|
||||
which enables JIT only if the current hardware is supported. You can check
|
||||
which enables JIT only if the current hardware is supported. You can check
|
||||
if JIT is enabled in the configuration summary that is output at the end of a
|
||||
<b>configure</b> run. If you are enabling JIT under SELinux you may also want to
|
||||
add
|
||||
|
|
|
@ -42,7 +42,7 @@ assertion is a condition that has a matching branch (that is, the condition is
|
|||
false).
|
||||
</P>
|
||||
<P>
|
||||
4. The following Perl escape sequences are not supported: \F, \l, \L, \u,
|
||||
4. The following Perl escape sequences are not supported: \F, \l, \L, \u,
|
||||
\U, and \N when followed by a character name. \N on its own, matching a
|
||||
non-newline character, and \N{U+dd..}, matching a Unicode code point, are
|
||||
supported. The escapes that modify the case of following letters are
|
||||
|
|
|
@ -105,7 +105,7 @@ If <b>buffer</b> points to a NULL pointer, an output buffer is obtained using
|
|||
the allocator in the context or <b>malloc()</b> if no context is supplied. A
|
||||
pointer to this buffer is placed in the variable to which <b>buffer</b> points.
|
||||
When no longer needed the output buffer must be freed by calling
|
||||
<b>pcre2_converted_pattern_free()</b>. If this function is called with a NULL
|
||||
<b>pcre2_converted_pattern_free()</b>. If this function is called with a NULL
|
||||
argument, it returns immediately without doing anything.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -155,7 +155,7 @@ of changing the way binary files are handled.
|
|||
<br><a name="SEC5" href="#TOC1">BINARY ZEROS IN PATTERNS</a><br>
|
||||
<P>
|
||||
Patterns passed from the command line are strings that are terminated by a
|
||||
binary zero, so cannot contain internal zeros. However, patterns that are read
|
||||
binary zero, so cannot contain internal zeros. However, patterns that are read
|
||||
from a file via the <b>-f</b> option may contain binary zeros.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">OPTIONS</a><br>
|
||||
|
@ -542,7 +542,7 @@ counter that is incremented each time around its main processing loop. If the
|
|||
value set by <b>--match-limit</b> is reached, an error occurs.
|
||||
<br>
|
||||
<br>
|
||||
The <b>--heap-limit</b> option specifies, as a number of kibibytes (units of
|
||||
The <b>--heap-limit</b> option specifies, as a number of kibibytes (units of
|
||||
1024 bytes), the amount of heap memory that may be used for matching. Heap
|
||||
memory is needed only if matching the pattern requires a significant number of
|
||||
nested backtracking points to be remembered. This parameter can be set to zero
|
||||
|
|
|
@ -193,7 +193,7 @@ are a starting size, a maximum size, and a general context (for memory
|
|||
allocation functions, or NULL for standard memory allocation). It returns a
|
||||
pointer to an opaque structure of type <b>pcre2_jit_stack</b>, or NULL if there
|
||||
is an error. The <b>pcre2_jit_stack_free()</b> function is used to free a stack
|
||||
that is no longer needed. If its argument is NULL, this function returns
|
||||
that is no longer needed. If its argument is NULL, this function returns
|
||||
immediately, without doing anything. (For the technically minded: the address
|
||||
space is allocated by mmap or VirtualAlloc.) A maximum stack size of 512KiB to
|
||||
1MiB should be more than enough for any pattern.
|
||||
|
|
|
@ -94,7 +94,7 @@ may also reduce the memory requirements.
|
|||
In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
|
||||
function calls, but only for processing atomic groups, lookaround assertions,
|
||||
and recursion within the pattern. The original version of the code used to
|
||||
allocate quite large internal workspace vectors on the stack, which caused some
|
||||
allocate quite large internal workspace vectors on the stack, which caused some
|
||||
problems for some patterns in environments with small stacks. From release
|
||||
10.32 the code for <b>pcre2_dfa_match()</b> has been re-factored to use heap
|
||||
memory when necessary for internal workspace when recursing, though recursive
|
||||
|
|
|
@ -50,13 +50,13 @@ PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor can they be
|
|||
reloaded using the 8-bit library.
|
||||
</P>
|
||||
<P>
|
||||
Note that "serialization" in PCRE2 does not convert compiled patterns to an
|
||||
abstract format like Java or .NET serialization. The serialized output is
|
||||
really just a bytecode dump, which is why it can only be reloaded in the same
|
||||
Note that "serialization" in PCRE2 does not convert compiled patterns to an
|
||||
abstract format like Java or .NET serialization. The serialized output is
|
||||
really just a bytecode dump, which is why it can only be reloaded in the same
|
||||
environment as the one that created it. Hence the restrictions mentioned above.
|
||||
Applications that are not statically linked with a fixed version of PCRE2 must
|
||||
be prepared to recompile patterns from their sources, in order to be immune to
|
||||
PCRE2 upgrades.
|
||||
be prepared to recompile patterns from their sources, in order to be immune to
|
||||
PCRE2 upgrades.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">SECURITY CONCERNS</a><br>
|
||||
<P>
|
||||
|
|
|
@ -70,7 +70,7 @@ This table applies to ASCII and Unicode environments.
|
|||
\ddd character with octal code ddd, or backreference
|
||||
\o{ddd..} character with octal code ddd..
|
||||
\U "U" if PCRE2_ALT_BSUX is set (otherwise is an error)
|
||||
\N{U+hh..} character with Unicode code point hh..
|
||||
\N{U+hh..} character with Unicode code point hh..
|
||||
\uhhhh character with hex code hhhh (if PCRE2_ALT_BSUX is set)
|
||||
\xhh character with hex code hh
|
||||
\x{hh..} character with hex code hh..
|
||||
|
@ -446,7 +446,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
|
|||
</P>
|
||||
<br><a name="SEC16" href="#TOC1">OPTION SETTING</a><br>
|
||||
<P>
|
||||
Changes of these options within a group are automatically cancelled at the end
|
||||
Changes of these options within a group are automatically cancelled at the end
|
||||
of the group.
|
||||
<pre>
|
||||
(?i) caseless
|
||||
|
@ -458,12 +458,12 @@ of the group.
|
|||
(?x) extended: ignore white space except in classes
|
||||
(?xx) as (?x) but also ignore space and tab in classes
|
||||
(?-...) unset option(s)
|
||||
(?^) unset imnsx options
|
||||
(?^) unset imnsx options
|
||||
</pre>
|
||||
Unsetting x or xx unsets both. Several options may be set at once, and a
|
||||
mixture of setting and unsetting such as (?i-x) is allowed, but there may be
|
||||
only one hyphen. Setting (but no unsetting) is allowed after (?^ for example
|
||||
(?^in). An option setting may appear at the start of a non-capturing group, for
|
||||
(?^in). An option setting may appear at the start of a non-capturing group, for
|
||||
example (?i:...).
|
||||
</P>
|
||||
<P>
|
||||
|
@ -604,8 +604,8 @@ pattern is not anchored.
|
|||
(*MARK:NAME); if not found, the (*SKIP) is ignored
|
||||
(*THEN) local failure, backtrack to next alternation
|
||||
</pre>
|
||||
The effect of one of these verbs in a group called as a subroutine is confined
|
||||
to the subroutine call.
|
||||
The effect of one of these verbs in a group called as a subroutine is confined
|
||||
to the subroutine call.
|
||||
</P>
|
||||
<br><a name="SEC24" href="#TOC1">CALLOUTS</a><br>
|
||||
<P>
|
||||
|
|
|
@ -315,8 +315,8 @@ number of subject lines to be matched against that pattern. In between sets of
|
|||
test data, command lines that begin with # may appear. This file format, with
|
||||
some restrictions, can also be processed by the <b>perltest.sh</b> script that
|
||||
is distributed with PCRE2 as a means of checking that the behaviour of PCRE2
|
||||
and Perl is the same. For a specification of <b>perltest.sh</b>, see the
|
||||
comments near its beginning.
|
||||
and Perl is the same. For a specification of <b>perltest.sh</b>, see the
|
||||
comments near its beginning.
|
||||
</P>
|
||||
<P>
|
||||
When the input is a terminal, <b>pcre2test</b> prompts for each line of input,
|
||||
|
@ -1446,10 +1446,10 @@ the minimum values for each parameter that allows the match to complete without
|
|||
error. If JIT is being used, only the match limit is relevant.
|
||||
</P>
|
||||
<P>
|
||||
When using this modifier, the pattern should not contain any limit settings
|
||||
such as (*LIMIT_MATCH=...) within it. If such a setting is present and is
|
||||
lower than the minimum matching value, the minimum value cannot be found
|
||||
because <b>pcre2_set_match_limit()</b> etc. are only able to reduce the value of
|
||||
When using this modifier, the pattern should not contain any limit settings
|
||||
such as (*LIMIT_MATCH=...) within it. If such a setting is present and is
|
||||
lower than the minimum matching value, the minimum value cannot be found
|
||||
because <b>pcre2_set_match_limit()</b> etc. are only able to reduce the value of
|
||||
an in-pattern limit; they cannot increase it.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -1464,13 +1464,13 @@ For non-DFA matching, the <i>match_limit</i> number is a measure of the amount
|
|||
of backtracking that takes place, and learning the minimum value can be
|
||||
instructive. For most simple matches, the number is quite small, but for
|
||||
patterns with very large numbers of matching possibilities, it can become large
|
||||
very quickly with increasing length of subject string. In the case of DFA
|
||||
matching, <i>match_limit</i> controls the total number of calls, both recursive
|
||||
and non-recursive, to the internal matching function, thus controlling the
|
||||
very quickly with increasing length of subject string. In the case of DFA
|
||||
matching, <i>match_limit</i> controls the total number of calls, both recursive
|
||||
and non-recursive, to the internal matching function, thus controlling the
|
||||
overall amount of computing resource that is used.
|
||||
</P>
|
||||
<P>
|
||||
For both kinds of matching, the <i>heap_limit</i> number, which is in kibibytes
|
||||
For both kinds of matching, the <i>heap_limit</i> number, which is in kibibytes
|
||||
(units of 1024 bytes), limits the amount of heap memory used for matching. A
|
||||
value of zero disables the use of any heap memory; many simple pattern matches
|
||||
can be done without using the heap, so zero is not an unreasonable setting.
|
||||
|
@ -1929,7 +1929,7 @@ documentation. In this section we describe the features of <b>pcre2test</b> that
|
|||
can be used to test these functions.
|
||||
</P>
|
||||
<P>
|
||||
Note that "serialization" in PCRE2 does not convert compiled patterns to an
|
||||
Note that "serialization" in PCRE2 does not convert compiled patterns to an
|
||||
abstract format like Java or .NET. It just makes a reloadable byte code stream.
|
||||
Hence the restrictions on reloading mentioned above.
|
||||
</P>
|
||||
|
|
10
doc/pcre2.3
10
doc/pcre2.3
|
@ -6,11 +6,11 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
.sp
|
||||
PCRE2 is the name used for a revised API for the PCRE library, which is a set
|
||||
of functions, written in C, that implement regular expression pattern matching
|
||||
using the same syntax and semantics as Perl, with just a few differences. After
|
||||
nearly two decades, the limitations of the original API were making development
|
||||
using the same syntax and semantics as Perl, with just a few differences. After
|
||||
nearly two decades, the limitations of the original API were making development
|
||||
increasingly difficult. The new API is more extensible, and it was simplified
|
||||
by abolishing the separate "study" optimizing function; in PCRE2, patterns are
|
||||
automatically optimized where possible. Since forking from PCRE1, the code has
|
||||
automatically optimized where possible. Since forking from PCRE1, the code has
|
||||
been extensively refactored and new features introduced.
|
||||
.P
|
||||
As well as Perl-style regular expression patterns, some features that appeared
|
||||
|
@ -158,7 +158,7 @@ listing), and the short pages for individual functions, are concatenated in
|
|||
pcre2build building PCRE2
|
||||
pcre2callout details of the callout feature
|
||||
pcre2compat discussion of Perl compatibility
|
||||
pcre2convert details of pattern conversion functions
|
||||
pcre2convert details of pattern conversion functions
|
||||
pcre2demo a demonstration C program that uses PCRE2
|
||||
pcre2grep description of the \fBpcre2grep\fP command (8-bit only)
|
||||
pcre2jit discussion of just-in-time optimization support
|
||||
|
@ -171,7 +171,7 @@ listing), and the short pages for individual functions, are concatenated in
|
|||
pcre2perform discussion of performance issues
|
||||
pcre2posix the POSIX-compatible C API for the 8-bit library
|
||||
pcre2sample discussion of the pcre2demo program
|
||||
pcre2serialize details of pattern serialization
|
||||
pcre2serialize details of pattern serialization
|
||||
pcre2syntax quick syntax reference
|
||||
pcre2test description of the \fBpcre2test\fP command
|
||||
pcre2unicode discussion of Unicode and UTF support
|
||||
|
|
|
@ -180,8 +180,8 @@ REVISION
|
|||
Last updated: 11 July 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2API(3) Library Functions Manual PCRE2API(3)
|
||||
|
||||
|
||||
|
@ -3545,8 +3545,8 @@ REVISION
|
|||
Last updated: 03 August 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3)
|
||||
|
||||
|
||||
|
@ -4100,8 +4100,8 @@ REVISION
|
|||
Last updated: 26 April 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3)
|
||||
|
||||
|
||||
|
@ -4526,8 +4526,8 @@ REVISION
|
|||
Last updated: 26 April 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3)
|
||||
|
||||
|
||||
|
@ -4731,8 +4731,8 @@ REVISION
|
|||
Last updated: 28 July 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2JIT(3) Library Functions Manual PCRE2JIT(3)
|
||||
|
||||
|
||||
|
@ -5128,8 +5128,8 @@ REVISION
|
|||
Last updated: 28 June 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3)
|
||||
|
||||
|
||||
|
@ -5199,8 +5199,8 @@ REVISION
|
|||
Last updated: 30 March 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3)
|
||||
|
||||
|
||||
|
@ -5418,8 +5418,8 @@ REVISION
|
|||
Last updated: 29 September 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3)
|
||||
|
||||
|
||||
|
@ -5858,8 +5858,8 @@ REVISION
|
|||
Last updated: 22 December 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3)
|
||||
|
||||
|
||||
|
@ -9100,8 +9100,8 @@ REVISION
|
|||
Last updated: 03 August 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3)
|
||||
|
||||
|
||||
|
@ -9335,8 +9335,8 @@ REVISION
|
|||
Last updated: 25 April 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2POSIX(3) Library Functions Manual PCRE2POSIX(3)
|
||||
|
||||
|
||||
|
@ -9642,8 +9642,8 @@ REVISION
|
|||
Last updated: 15 June 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3)
|
||||
|
||||
|
||||
|
@ -9921,8 +9921,8 @@ REVISION
|
|||
Last updated: 27 June 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2SYNTAX(3) Library Functions Manual PCRE2SYNTAX(3)
|
||||
|
||||
|
||||
|
@ -10390,8 +10390,8 @@ REVISION
|
|||
Last updated: 01 August 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3)
|
||||
|
||||
|
||||
|
@ -10647,5 +10647,5 @@ REVISION
|
|||
Last updated: 17 May 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
This function is part of an experimental set of pattern conversion functions.
|
||||
It frees the memory occupied by a convert context, using the memory
|
||||
freeing function from the general context with which it was created, or
|
||||
\fBfree()\fP if that was not set. If the argument is NULL, the function returns
|
||||
\fBfree()\fP if that was not set. If the argument is NULL, the function returns
|
||||
immediately without doing anything.
|
||||
.P
|
||||
The pattern conversion functions are described in the
|
||||
|
|
|
@ -16,7 +16,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
This function is part of an experimental set of pattern conversion functions.
|
||||
It frees the memory occupied by a converted pattern that was obtained by
|
||||
calling \fBpcre2_pattern_convert()\fP with arguments that caused it to place
|
||||
the converted pattern into newly obtained heap memory. If the argument is NULL,
|
||||
the converted pattern into newly obtained heap memory. If the argument is NULL,
|
||||
the function returns immediately without doing anything.
|
||||
.P
|
||||
The pattern conversion functions are described in the
|
||||
|
|
|
@ -24,8 +24,8 @@ passed to a matching function. The arguments of this function are:
|
|||
callback a callback function
|
||||
callback_data a JIT stack or a value to be passed to the callback
|
||||
.P
|
||||
If \fImcontext\fP is NULL, the function returns immediately, without doing
|
||||
anything.
|
||||
If \fImcontext\fP is NULL, the function returns immediately, without doing
|
||||
anything.
|
||||
.P
|
||||
If \fIcallback\fP is NULL and \fIcallback_data\fP is NULL, an internal 32KiB
|
||||
block on the machine stack is used.
|
||||
|
|
|
@ -13,7 +13,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
.rs
|
||||
.sp
|
||||
This function is used to free a JIT stack that was created by
|
||||
\fBpcre2_jit_stack_create()\fP when it is no longer needed. If the argument is
|
||||
\fBpcre2_jit_stack_create()\fP when it is no longer needed. If the argument is
|
||||
NULL, the function returns immediately without doing anything. For more
|
||||
details, see the
|
||||
.\" HREF
|
||||
|
|
|
@ -48,6 +48,6 @@ There is a complete description of the PCRE2 native API in the
|
|||
.\"
|
||||
page and a description of the serialization functions in the
|
||||
.\" HREF
|
||||
\fBpcre2serialize\fP
|
||||
.\"
|
||||
\fBpcre2serialize\fP
|
||||
.\"
|
||||
page.
|
||||
|
|
|
@ -16,7 +16,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
.rs
|
||||
.sp
|
||||
This function encodes a list of compiled patterns into a byte stream that can
|
||||
be saved on disc or elsewhere. Note that this is not an abstract format like
|
||||
be saved on disc or elsewhere. Note that this is not an abstract format like
|
||||
Java or .NET. Conversion of the byte stream back into usable compiled patterns
|
||||
can only happen on a host that is running the same version of PCRE2, with the
|
||||
same code unit width, and the host must also have the same endianness, pointer
|
||||
|
@ -49,6 +49,6 @@ There is a complete description of the PCRE2 native API in the
|
|||
.\"
|
||||
page and a description of the serialization functions in the
|
||||
.\" HREF
|
||||
\fBpcre2serialize\fP
|
||||
.\"
|
||||
\fBpcre2serialize\fP
|
||||
.\"
|
||||
page.
|
||||
|
|
|
@ -15,7 +15,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
.sp
|
||||
This function frees the memory that was obtained by
|
||||
\fBpcre2_serialize_encode()\fP to hold a serialized byte stream. The argument
|
||||
must point to such a byte stream or be NULL, in which case the function returns
|
||||
must point to such a byte stream or be NULL, in which case the function returns
|
||||
without doing anything.
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
@ -24,6 +24,6 @@ There is a complete description of the PCRE2 native API in the
|
|||
.\"
|
||||
page and a description of the serialization functions in the
|
||||
.\" HREF
|
||||
\fBpcre2serialize\fP
|
||||
.\"
|
||||
\fBpcre2serialize\fP
|
||||
.\"
|
||||
page.
|
||||
|
|
|
@ -32,6 +32,6 @@ There is a complete description of the PCRE2 native API in the
|
|||
.\"
|
||||
page and a description of the serialization functions in the
|
||||
.\" HREF
|
||||
\fBpcre2serialize\fP
|
||||
.\"
|
||||
\fBpcre2serialize\fP
|
||||
.\"
|
||||
page.
|
||||
|
|
|
@ -14,7 +14,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
.sp
|
||||
This is a convenience function for freeing the store obtained by a previous
|
||||
call to \fBpcre2substring_list_get()\fP. Its only argument is a pointer to
|
||||
the list of string pointers. If the argument is NULL, the function returns
|
||||
the list of string pointers. If the argument is NULL, the function returns
|
||||
immediately, without doing anything.
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -453,8 +453,8 @@ been matched by \fBpcre2_match()\fP. They are:
|
|||
\fBpcre2_substring_number_from_name()\fP
|
||||
.sp
|
||||
\fBpcre2_substring_free()\fP and \fBpcre2_substring_list_free()\fP are also
|
||||
provided, to free memory used for extracted strings. If either of these
|
||||
functions is called with a NULL argument, the function returns immediately
|
||||
provided, to free memory used for extracted strings. If either of these
|
||||
functions is called with a NULL argument, the function returns immediately
|
||||
without doing anything.
|
||||
.P
|
||||
The function \fBpcre2_substitute()\fP can be called to match a pattern and
|
||||
|
@ -1182,7 +1182,7 @@ If the compile context argument \fIccontext\fP is NULL, memory for the compiled
|
|||
pattern is obtained by calling \fBmalloc()\fP. Otherwise, it is obtained from
|
||||
the same memory function that was used for the compile context. The caller must
|
||||
free the memory by calling \fBpcre2_code_free()\fP when it is no longer needed.
|
||||
If \fBpcre2_code_free()\fP is called with a NULL argument, it returns
|
||||
If \fBpcre2_code_free()\fP is called with a NULL argument, it returns
|
||||
immediately, without doing anything.
|
||||
.P
|
||||
The function \fBpcre2_code_copy()\fP makes a copy of the compiled code in new
|
||||
|
@ -1194,7 +1194,7 @@ below),
|
|||
.\"
|
||||
the JIT information cannot be copied (because it is position-dependent).
|
||||
The new copy can initially be used only for non-JIT matching, though it can be
|
||||
passed to \fBpcre2_jit_compile()\fP if required. If \fBpcre2_code_copy()\fP is
|
||||
passed to \fBpcre2_jit_compile()\fP if required. If \fBpcre2_code_copy()\fP is
|
||||
called with a NULL argument, it returns NULL.
|
||||
.P
|
||||
The \fBpcre2_code_copy()\fP function provides a way for individual threads in a
|
||||
|
@ -1448,17 +1448,17 @@ and between a quantifier and a following + that indicates possessiveness.
|
|||
PCRE2_EXTENDED is equivalent to Perl's /x option, and it can be changed within
|
||||
a pattern by a (?x) option setting.
|
||||
.P
|
||||
When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recognizes as
|
||||
white space only those characters with code points less than 256 that are
|
||||
flagged as white space in its low-character table. The table is normally
|
||||
created by
|
||||
When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recognizes as
|
||||
white space only those characters with code points less than 256 that are
|
||||
flagged as white space in its low-character table. The table is normally
|
||||
created by
|
||||
.\" HREF
|
||||
\fBpcre2_maketables()\fP,
|
||||
\fBpcre2_maketables()\fP,
|
||||
.\"
|
||||
which uses the \fBisspace()\fP function to identify space characters. In most
|
||||
ASCII environments, the relevant characters are those with code points 0x0009
|
||||
(tab), 0x000A (linefeed), 0x000B (vertical tab), 0x000C (formfeed), 0x000D
|
||||
(carriage return), and 0x0020 (space).
|
||||
(carriage return), and 0x0020 (space).
|
||||
.P
|
||||
When PCRE2 is compiled with Unicode support, in addition to these characters,
|
||||
five more Unicode "Pattern White Space" characters are recognized by
|
||||
|
@ -1488,8 +1488,8 @@ built.
|
|||
PCRE2_EXTENDED_MORE
|
||||
.sp
|
||||
This option has the effect of PCRE2_EXTENDED, but, in addition, unescaped space
|
||||
and horizontal tab characters are ignored inside a character class. Note: only
|
||||
these two characters are ignored, not the full set of pattern white space
|
||||
and horizontal tab characters are ignored inside a character class. Note: only
|
||||
these two characters are ignored, not the full set of pattern white space
|
||||
characters that are ignored outside a character class. PCRE2_EXTENDED_MORE is
|
||||
equivalent to Perl's /xx option, and it can be changed within a pattern by a
|
||||
(?xx) option setting.
|
||||
|
@ -2288,7 +2288,7 @@ reloaded must be running the same version of PCRE2, with the same code unit
|
|||
width, and must also have the same endianness, pointer width, and PCRE2_SIZE
|
||||
type. Before compiled patterns can be saved, they must be converted to a
|
||||
"serialized" form, which in the case of PCRE2 is really just a bytecode dump.
|
||||
The functions whose names begin with \fBpcre2_serialize_\fP are used for
|
||||
The functions whose names begin with \fBpcre2_serialize_\fP are used for
|
||||
converting to and from the serialized form. They are described in the
|
||||
.\" HREF
|
||||
\fBpcre2serialize\fP
|
||||
|
@ -2366,7 +2366,7 @@ free a compiled pattern or a subject string until after all operations on the
|
|||
match data block (for that match) have taken place.
|
||||
.P
|
||||
When a match data block itself is no longer needed, it should be freed by
|
||||
calling \fBpcre2_match_data_free()\fP. If this function is called with a NULL
|
||||
calling \fBpcre2_match_data_free()\fP. If this function is called with a NULL
|
||||
argument, it returns immediately, without doing anything.
|
||||
.
|
||||
.
|
||||
|
@ -3370,7 +3370,7 @@ replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE
|
|||
(invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE (closing curly bracket
|
||||
not found), PCRE2_ERROR_BADSUBSTITUTION (syntax error in extended group
|
||||
substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before
|
||||
it started or the match started earlier than the current position in the
|
||||
it started or the match started earlier than the current position in the
|
||||
subject, which can happen if \eK is used in an assertion).
|
||||
.P
|
||||
As for all PCRE2 errors, a text message that describes the error can be
|
||||
|
|
|
@ -59,7 +59,7 @@ The following sections include descriptions of "on/off" options whose names
|
|||
begin with --enable or --disable. Because of the way that \fBconfigure\fP
|
||||
works, --enable and --disable always come in pairs, so the complementary option
|
||||
always exists as well, but as it specifies the default, it is not described.
|
||||
Options that specify values have names that start with --with. At the end of a
|
||||
Options that specify values have names that start with --with. At the end of a
|
||||
\fBconfigure\fP run, a summary of the configuration is output.
|
||||
.
|
||||
.
|
||||
|
@ -157,12 +157,12 @@ Just-in-time (JIT) compiler support is included in the build by specifying
|
|||
--enable-jit
|
||||
.sp
|
||||
This support is available only for certain hardware architectures. If this
|
||||
option is set for an unsupported architecture, a building error occurs.
|
||||
If in doubt, use
|
||||
option is set for an unsupported architecture, a building error occurs.
|
||||
If in doubt, use
|
||||
.sp
|
||||
--enable-jit=auto
|
||||
.sp
|
||||
which enables JIT only if the current hardware is supported. You can check
|
||||
which enables JIT only if the current hardware is supported. You can check
|
||||
if JIT is enabled in the configuration summary that is output at the end of a
|
||||
\fBconfigure\fP run. If you are enabling JIT under SELinux you may also want to
|
||||
add
|
||||
|
|
|
@ -28,7 +28,7 @@ counted, but their entries in the offsets vector are set only when a negative
|
|||
assertion is a condition that has a matching branch (that is, the condition is
|
||||
false).
|
||||
.P
|
||||
4. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu,
|
||||
4. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu,
|
||||
\eU, and \eN when followed by a character name. \eN on its own, matching a
|
||||
non-newline character, and \eN{U+dd..}, matching a Unicode code point, are
|
||||
supported. The escapes that modify the case of following letters are
|
||||
|
|
|
@ -83,7 +83,7 @@ If \fBbuffer\fP points to a NULL pointer, an output buffer is obtained using
|
|||
the allocator in the context or \fBmalloc()\fP if no context is supplied. A
|
||||
pointer to this buffer is placed in the variable to which \fBbuffer\fP points.
|
||||
When no longer needed the output buffer must be freed by calling
|
||||
\fBpcre2_converted_pattern_free()\fP. If this function is called with a NULL
|
||||
\fBpcre2_converted_pattern_free()\fP. If this function is called with a NULL
|
||||
argument, it returns immediately without doing anything.
|
||||
.P
|
||||
If \fBbuffer\fP points to a non-NULL pointer, \fBblength\fP must be set to the
|
||||
|
|
|
@ -126,7 +126,7 @@ of changing the way binary files are handled.
|
|||
.rs
|
||||
.sp
|
||||
Patterns passed from the command line are strings that are terminated by a
|
||||
binary zero, so cannot contain internal zeros. However, patterns that are read
|
||||
binary zero, so cannot contain internal zeros. However, patterns that are read
|
||||
from a file via the \fB-f\fP option may contain binary zeros.
|
||||
.
|
||||
.
|
||||
|
@ -471,7 +471,7 @@ is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a
|
|||
counter that is incremented each time around its main processing loop. If the
|
||||
value set by \fB--match-limit\fP is reached, an error occurs.
|
||||
.sp
|
||||
The \fB--heap-limit\fP option specifies, as a number of kibibytes (units of
|
||||
The \fB--heap-limit\fP option specifies, as a number of kibibytes (units of
|
||||
1024 bytes), the amount of heap memory that may be used for matching. Heap
|
||||
memory is needed only if matching the pattern requires a significant number of
|
||||
nested backtracking points to be remembered. This parameter can be set to zero
|
||||
|
|
|
@ -177,7 +177,7 @@ are a starting size, a maximum size, and a general context (for memory
|
|||
allocation functions, or NULL for standard memory allocation). It returns a
|
||||
pointer to an opaque structure of type \fBpcre2_jit_stack\fP, or NULL if there
|
||||
is an error. The \fBpcre2_jit_stack_free()\fP function is used to free a stack
|
||||
that is no longer needed. If its argument is NULL, this function returns
|
||||
that is no longer needed. If its argument is NULL, this function returns
|
||||
immediately, without doing anything. (For the technically minded: the address
|
||||
space is allocated by mmap or VirtualAlloc.) A maximum stack size of 512KiB to
|
||||
1MiB should be more than enough for any pattern.
|
||||
|
|
|
@ -79,7 +79,7 @@ may also reduce the memory requirements.
|
|||
In contrast to \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP does use recursive
|
||||
function calls, but only for processing atomic groups, lookaround assertions,
|
||||
and recursion within the pattern. The original version of the code used to
|
||||
allocate quite large internal workspace vectors on the stack, which caused some
|
||||
allocate quite large internal workspace vectors on the stack, which caused some
|
||||
problems for some patterns in environments with small stacks. From release
|
||||
10.32 the code for \fBpcre2_dfa_match()\fP has been re-factored to use heap
|
||||
memory when necessary for internal workspace when recursing, though recursive
|
||||
|
|
|
@ -29,13 +29,13 @@ and PCRE2_SIZE type. For example, patterns compiled on a 32-bit system using
|
|||
PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor can they be
|
||||
reloaded using the 8-bit library.
|
||||
.P
|
||||
Note that "serialization" in PCRE2 does not convert compiled patterns to an
|
||||
abstract format like Java or .NET serialization. The serialized output is
|
||||
really just a bytecode dump, which is why it can only be reloaded in the same
|
||||
Note that "serialization" in PCRE2 does not convert compiled patterns to an
|
||||
abstract format like Java or .NET serialization. The serialized output is
|
||||
really just a bytecode dump, which is why it can only be reloaded in the same
|
||||
environment as the one that created it. Hence the restrictions mentioned above.
|
||||
Applications that are not statically linked with a fixed version of PCRE2 must
|
||||
be prepared to recompile patterns from their sources, in order to be immune to
|
||||
PCRE2 upgrades.
|
||||
be prepared to recompile patterns from their sources, in order to be immune to
|
||||
PCRE2 upgrades.
|
||||
.
|
||||
.
|
||||
.SH "SECURITY CONCERNS"
|
||||
|
|
|
@ -35,7 +35,7 @@ This table applies to ASCII and Unicode environments.
|
|||
\eddd character with octal code ddd, or backreference
|
||||
\eo{ddd..} character with octal code ddd..
|
||||
\eU "U" if PCRE2_ALT_BSUX is set (otherwise is an error)
|
||||
\eN{U+hh..} character with Unicode code point hh..
|
||||
\eN{U+hh..} character with Unicode code point hh..
|
||||
\euhhhh character with hex code hhhh (if PCRE2_ALT_BSUX is set)
|
||||
\exhh character with hex code hh
|
||||
\ex{hh..} character with hex code hh..
|
||||
|
@ -421,7 +421,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
|
|||
.
|
||||
.SH "OPTION SETTING"
|
||||
.rs
|
||||
Changes of these options within a group are automatically cancelled at the end
|
||||
Changes of these options within a group are automatically cancelled at the end
|
||||
of the group.
|
||||
.sp
|
||||
(?i) caseless
|
||||
|
@ -433,12 +433,12 @@ of the group.
|
|||
(?x) extended: ignore white space except in classes
|
||||
(?xx) as (?x) but also ignore space and tab in classes
|
||||
(?-...) unset option(s)
|
||||
(?^) unset imnsx options
|
||||
(?^) unset imnsx options
|
||||
.sp
|
||||
Unsetting x or xx unsets both. Several options may be set at once, and a
|
||||
mixture of setting and unsetting such as (?i-x) is allowed, but there may be
|
||||
only one hyphen. Setting (but no unsetting) is allowed after (?^ for example
|
||||
(?^in). An option setting may appear at the start of a non-capturing group, for
|
||||
(?^in). An option setting may appear at the start of a non-capturing group, for
|
||||
example (?i:...).
|
||||
.P
|
||||
The following are recognized only at the very start of a pattern or after one
|
||||
|
@ -584,8 +584,8 @@ pattern is not anchored.
|
|||
(*MARK:NAME); if not found, the (*SKIP) is ignored
|
||||
(*THEN) local failure, backtrack to next alternation
|
||||
.sp
|
||||
The effect of one of these verbs in a group called as a subroutine is confined
|
||||
to the subroutine call.
|
||||
The effect of one of these verbs in a group called as a subroutine is confined
|
||||
to the subroutine call.
|
||||
.
|
||||
.
|
||||
.SH "CALLOUTS"
|
||||
|
|
|
@ -266,8 +266,8 @@ number of subject lines to be matched against that pattern. In between sets of
|
|||
test data, command lines that begin with # may appear. This file format, with
|
||||
some restrictions, can also be processed by the \fBperltest.sh\fP script that
|
||||
is distributed with PCRE2 as a means of checking that the behaviour of PCRE2
|
||||
and Perl is the same. For a specification of \fBperltest.sh\fP, see the
|
||||
comments near its beginning.
|
||||
and Perl is the same. For a specification of \fBperltest.sh\fP, see the
|
||||
comments near its beginning.
|
||||
.P
|
||||
When the input is a terminal, \fBpcre2test\fP prompts for each line of input,
|
||||
using "re>" to prompt for regular expression patterns, and "data>" to prompt
|
||||
|
@ -1408,10 +1408,10 @@ the match context via \fBpcre2_set_heap_limit()\fP,
|
|||
the minimum values for each parameter that allows the match to complete without
|
||||
error. If JIT is being used, only the match limit is relevant.
|
||||
.P
|
||||
When using this modifier, the pattern should not contain any limit settings
|
||||
such as (*LIMIT_MATCH=...) within it. If such a setting is present and is
|
||||
lower than the minimum matching value, the minimum value cannot be found
|
||||
because \fBpcre2_set_match_limit()\fP etc. are only able to reduce the value of
|
||||
When using this modifier, the pattern should not contain any limit settings
|
||||
such as (*LIMIT_MATCH=...) within it. If such a setting is present and is
|
||||
lower than the minimum matching value, the minimum value cannot be found
|
||||
because \fBpcre2_set_match_limit()\fP etc. are only able to reduce the value of
|
||||
an in-pattern limit; they cannot increase it.
|
||||
.P
|
||||
For non-DFA matching, the minimum \fIdepth_limit\fP number is a measure of how
|
||||
|
@ -1424,12 +1424,12 @@ For non-DFA matching, the \fImatch_limit\fP number is a measure of the amount
|
|||
of backtracking that takes place, and learning the minimum value can be
|
||||
instructive. For most simple matches, the number is quite small, but for
|
||||
patterns with very large numbers of matching possibilities, it can become large
|
||||
very quickly with increasing length of subject string. In the case of DFA
|
||||
matching, \fImatch_limit\fP controls the total number of calls, both recursive
|
||||
and non-recursive, to the internal matching function, thus controlling the
|
||||
very quickly with increasing length of subject string. In the case of DFA
|
||||
matching, \fImatch_limit\fP controls the total number of calls, both recursive
|
||||
and non-recursive, to the internal matching function, thus controlling the
|
||||
overall amount of computing resource that is used.
|
||||
.P
|
||||
For both kinds of matching, the \fIheap_limit\fP number, which is in kibibytes
|
||||
For both kinds of matching, the \fIheap_limit\fP number, which is in kibibytes
|
||||
(units of 1024 bytes), limits the amount of heap memory used for matching. A
|
||||
value of zero disables the use of any heap memory; many simple pattern matches
|
||||
can be done without using the heap, so zero is not an unreasonable setting.
|
||||
|
@ -1897,7 +1897,7 @@ for serializing and de-serializing. They are described in the
|
|||
documentation. In this section we describe the features of \fBpcre2test\fP that
|
||||
can be used to test these functions.
|
||||
.P
|
||||
Note that "serialization" in PCRE2 does not convert compiled patterns to an
|
||||
Note that "serialization" in PCRE2 does not convert compiled patterns to an
|
||||
abstract format like Java or .NET. It just makes a reloadable byte code stream.
|
||||
Hence the restrictions on reloading mentioned above.
|
||||
.P
|
||||
|
|
|
@ -233,8 +233,8 @@ for (;;)
|
|||
|
||||
# Use no_start_optimize (disable PCRE2 start-up optimization) to disable Perl
|
||||
# optimization by inserting (??{""}) at the start of the pattern. We may
|
||||
# also encounter -no_start_optimize from a #pattern setting.
|
||||
|
||||
# also encounter -no_start_optimize from a #pattern setting.
|
||||
|
||||
$mod =~ s/-no_start_optimize,?//;
|
||||
if ($mod =~ s/no_start_optimize,?//) { $pat =~ s/$del/$del(??{""})/; }
|
||||
|
||||
|
|
|
@ -144,8 +144,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
instead. This allows for longer patterns in extreme cases. */
|
||||
#undef LINK_SIZE
|
||||
|
||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||
*/
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||
#undef LT_OBJDIR
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
|
|
|
@ -46,7 +46,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#define PCRE2_PRERELEASE @PCRE2_PRERELEASE@
|
||||
#define PCRE2_DATE @PCRE2_DATE@
|
||||
|
||||
/* For the benefit of systems without stdint.h, an alternative is to use
|
||||
/* For the benefit of systems without stdint.h, an alternative is to use
|
||||
inttypes.h. The existence of these headers is checked by configure or CMake. */
|
||||
|
||||
#define PCRE2_HAVE_STDINT_H @PCRE2_HAVE_STDINT_H@
|
||||
|
@ -88,7 +88,7 @@ set, we ensure here that it has no effect. */
|
|||
#endif
|
||||
|
||||
/* Have to include limits.h, stdlib.h and stdint.h (or inttypes.h) to ensure
|
||||
that size_t and uint8_t, UCHAR_MAX, etc are defined. If the system has neither
|
||||
that size_t and uint8_t, UCHAR_MAX, etc are defined. If the system has neither
|
||||
header, the relevant values must be provided by some other means. */
|
||||
|
||||
#include <limits.h>
|
||||
|
|
|
@ -1235,7 +1235,7 @@ for (;;)
|
|||
#endif
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
|
|
|
@ -3593,8 +3593,8 @@ while (ptr < ptrend)
|
|||
else
|
||||
{
|
||||
BOOL hyphenok = TRUE;
|
||||
uint32_t oldoptions = options;
|
||||
|
||||
uint32_t oldoptions = options;
|
||||
|
||||
top_nest->reset_group = 0;
|
||||
top_nest->max_group = 0;
|
||||
set = unset = 0;
|
||||
|
|
|
@ -263,7 +263,7 @@ static const unsigned char match_error_texts[] =
|
|||
"bad serialized data\0"
|
||||
"heap limit exceeded\0"
|
||||
"invalid syntax\0"
|
||||
/* 65 */
|
||||
/* 65 */
|
||||
"internal error - duplicate substitution match\0"
|
||||
;
|
||||
|
||||
|
|
|
@ -132,10 +132,10 @@ while (eptr < end_subject)
|
|||
/* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
|
||||
allows any number of them before a following Extended_Pictographic. */
|
||||
|
||||
if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
|
||||
if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
|
||||
lgb != ucp_gbExtended_Pictographic)
|
||||
lgb = rgb;
|
||||
|
||||
|
||||
eptr += len;
|
||||
if (xcount != NULL) *xcount += 1;
|
||||
}
|
||||
|
|
|
@ -131,7 +131,7 @@ for (;;)
|
|||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
|
|
|
@ -897,7 +897,7 @@ typedef struct dfa_match_block {
|
|||
const uint8_t *tables; /* Character tables */
|
||||
PCRE2_SIZE start_offset; /* The start offset value */
|
||||
PCRE2_SIZE heap_limit; /* As it says */
|
||||
PCRE2_SIZE heap_used; /* As it says */
|
||||
PCRE2_SIZE heap_used; /* As it says */
|
||||
uint32_t match_limit; /* As it says */
|
||||
uint32_t match_limit_depth; /* As it says */
|
||||
uint32_t match_call_count; /* Number of calls of internal function */
|
||||
|
|
|
@ -839,7 +839,7 @@ switch(*cc)
|
|||
#endif
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
|
@ -940,7 +940,7 @@ while (cc < ccend)
|
|||
common->control_head_ptr = 1;
|
||||
/* Fall through. */
|
||||
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_MARK:
|
||||
if (common->mark_ptr == 0)
|
||||
|
@ -1555,7 +1555,7 @@ while (cc < ccend)
|
|||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_THEN_ARG:
|
||||
SLJIT_ASSERT(common->mark_ptr != 0);
|
||||
|
@ -1736,7 +1736,7 @@ while (cc < ccend)
|
|||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_THEN_ARG:
|
||||
SLJIT_ASSERT(common->mark_ptr != 0);
|
||||
|
@ -2045,7 +2045,7 @@ while (cc < ccend)
|
|||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_THEN_ARG:
|
||||
SLJIT_ASSERT(common->mark_ptr != 0);
|
||||
|
@ -2433,7 +2433,7 @@ while (cc < ccend)
|
|||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_THEN_ARG:
|
||||
SLJIT_ASSERT(common->mark_ptr != 0);
|
||||
|
@ -7264,11 +7264,11 @@ while (cc < end_subject)
|
|||
|
||||
/* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
|
||||
allows any number of them before a following Extended_Pictographic. */
|
||||
|
||||
if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
|
||||
|
||||
if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
|
||||
lgb != ucp_gbExtended_Pictographic)
|
||||
lgb = rgb;
|
||||
|
||||
|
||||
prevcc = cc;
|
||||
cc += len;
|
||||
}
|
||||
|
@ -7320,11 +7320,11 @@ while (cc < end_subject)
|
|||
|
||||
/* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
|
||||
allows any number of them before a following Extended_Pictographic. */
|
||||
|
||||
if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
|
||||
|
||||
if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
|
||||
lgb != ucp_gbExtended_Pictographic)
|
||||
lgb = rgb;
|
||||
|
||||
|
||||
cc++;
|
||||
}
|
||||
|
||||
|
@ -10356,7 +10356,7 @@ backtrack_common *backtrack;
|
|||
PCRE2_UCHAR opcode = *cc;
|
||||
PCRE2_SPTR ccend = cc + 1;
|
||||
|
||||
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
|
||||
if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
|
||||
opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
|
||||
ccend += 2 + cc[1];
|
||||
|
||||
|
@ -10688,7 +10688,7 @@ while (cc < ccend)
|
|||
case OP_THEN:
|
||||
case OP_THEN_ARG:
|
||||
case OP_COMMIT:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COMMIT_ARG:
|
||||
cc = compile_control_verb_matchingpath(common, cc, parent);
|
||||
break;
|
||||
|
||||
|
@ -11763,7 +11763,7 @@ while (current)
|
|||
break;
|
||||
|
||||
case OP_COMMIT:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COMMIT_ARG:
|
||||
if (!common->local_quit_available)
|
||||
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
|
||||
if (common->quit_label == NULL)
|
||||
|
|
|
@ -1331,7 +1331,7 @@ static int regression_tests(void)
|
|||
ovector8_2[i] = -2;
|
||||
}
|
||||
if (re8) {
|
||||
(void)pcre2_set_match_limit_8(mcontext8, 10000000);
|
||||
(void)pcre2_set_match_limit_8(mcontext8, 10000000);
|
||||
return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
|
||||
|
||||
|
@ -1376,7 +1376,7 @@ static int regression_tests(void)
|
|||
else
|
||||
length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
|
||||
|
||||
(void)pcre2_set_match_limit_16(mcontext16, 10000000);
|
||||
(void)pcre2_set_match_limit_16(mcontext16, 10000000);
|
||||
return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
|
||||
|
||||
|
@ -1421,7 +1421,7 @@ static int regression_tests(void)
|
|||
else
|
||||
length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
|
||||
|
||||
(void)pcre2_set_match_limit_32(mcontext32, 10000000);
|
||||
(void)pcre2_set_match_limit_32(mcontext32, 10000000);
|
||||
return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
|
||||
current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
|
||||
|
||||
|
|
|
@ -5136,7 +5136,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
(char *)assert_accept_frame + offsetof(heapframe, ovector),
|
||||
assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
|
||||
Foffset_top = assert_accept_frame->offset_top;
|
||||
Fmark = assert_accept_frame->mark;
|
||||
Fmark = assert_accept_frame->mark;
|
||||
break;
|
||||
}
|
||||
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
|
||||
|
|
|
@ -390,7 +390,7 @@ while (TRUE)
|
|||
#endif
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
|
|
|
@ -799,7 +799,7 @@ for(;;)
|
|||
break;
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
|
|
|
@ -707,7 +707,7 @@ for (;;)
|
|||
/* Skip these, but we need to add in the name length. */
|
||||
|
||||
case OP_MARK:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_PRUNE_ARG:
|
||||
case OP_SKIP_ARG:
|
||||
case OP_THEN_ARG:
|
||||
|
@ -957,7 +957,7 @@ do
|
|||
case OP_CIRCM:
|
||||
case OP_CLOSE:
|
||||
case OP_COMMIT:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COMMIT_ARG:
|
||||
case OP_COND:
|
||||
case OP_CREF:
|
||||
case OP_FALSE:
|
||||
|
|
|
@ -100,7 +100,7 @@ enum {
|
|||
ucp_Zs /* Space separator */
|
||||
};
|
||||
|
||||
/* These are grapheme break properties. The Extended Pictographic property
|
||||
/* These are grapheme break properties. The Extended Pictographic property
|
||||
comes from the emoji-data.txt file. */
|
||||
|
||||
enum {
|
||||
|
@ -273,13 +273,13 @@ enum {
|
|||
ucp_Nushu,
|
||||
ucp_Soyombo,
|
||||
ucp_Zanabazar_Square,
|
||||
/* New for Unicode 11.0.0 */
|
||||
ucp_Dogra,
|
||||
ucp_Gunjala_Gondi,
|
||||
ucp_Hanifi_Rohingya,
|
||||
ucp_Makasar,
|
||||
/* New for Unicode 11.0.0 */
|
||||
ucp_Dogra,
|
||||
ucp_Gunjala_Gondi,
|
||||
ucp_Hanifi_Rohingya,
|
||||
ucp_Makasar,
|
||||
ucp_Medefaidrin,
|
||||
ucp_Old_Sogdian,
|
||||
ucp_Old_Sogdian,
|
||||
ucp_Sogdian
|
||||
};
|
||||
|
||||
|
|
|
@ -344,9 +344,9 @@ if (rc >= 0)
|
|||
if ((size_t)rc > nmatch) rc = (int)nmatch;
|
||||
for (i = 0; i < (size_t)rc; i++)
|
||||
{
|
||||
pmatch[i].rm_so = (ovector[i*2] == PCRE2_UNSET)? -1 :
|
||||
pmatch[i].rm_so = (ovector[i*2] == PCRE2_UNSET)? -1 :
|
||||
(int)(ovector[i*2] + so);
|
||||
pmatch[i].rm_eo = (ovector[i*2+1] == PCRE2_UNSET)? -1 :
|
||||
pmatch[i].rm_eo = (ovector[i*2+1] == PCRE2_UNSET)? -1 :
|
||||
(int)(ovector[i*2+1] + so);
|
||||
}
|
||||
for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
||||
|
|
Loading…
Reference in New Issue