File tidies for 10.23-RC1
This commit is contained in:
parent
31c62034fe
commit
ca1ecd41a1
6
AUTHORS
6
AUTHORS
|
@ -8,7 +8,7 @@ Email domain: cam.ac.uk
|
||||||
University of Cambridge Computing Service,
|
University of Cambridge Computing Service,
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
|
|
||||||
Copyright (c) 1997-2016 University of Cambridge
|
Copyright (c) 1997-2017 University of Cambridge
|
||||||
All rights reserved
|
All rights reserved
|
||||||
|
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
|
||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Emain domain: freemail.hu
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2010-2016 Zoltan Herczeg
|
Copyright(c) 2010-2017 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
|
||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Emain domain: freemail.hu
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2009-2016 Zoltan Herczeg
|
Copyright(c) 2009-2017 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
####
|
####
|
||||||
|
|
|
@ -2,8 +2,8 @@ Change Log for PCRE2
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
|
|
||||||
Version 10.23 xx-xxxxxx-2016
|
Version 10.23 16-January-2017
|
||||||
----------------------------
|
-----------------------------
|
||||||
|
|
||||||
1. Extended pcre2test with the utf8_input modifier so that it is able to
|
1. Extended pcre2test with the utf8_input modifier so that it is able to
|
||||||
generate all possible 16-bit and 32-bit code unit values in non-UTF modes.
|
generate all possible 16-bit and 32-bit code unit values in non-UTF modes.
|
||||||
|
|
6
LICENCE
6
LICENCE
|
@ -25,7 +25,7 @@ Email domain: cam.ac.uk
|
||||||
University of Cambridge Computing Service,
|
University of Cambridge Computing Service,
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
|
|
||||||
Copyright (c) 1997-2016 University of Cambridge
|
Copyright (c) 1997-2017 University of Cambridge
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ Written by: Zoltan Herczeg
|
||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Emain domain: freemail.hu
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2010-2016 Zoltan Herczeg
|
Copyright(c) 2010-2017 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
@ -47,7 +47,7 @@ Written by: Zoltan Herczeg
|
||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Emain domain: freemail.hu
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2009-2016 Zoltan Herczeg
|
Copyright(c) 2009-2017 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,10 @@
|
||||||
AUTOMAKE_OPTIONS = subdir-objects
|
AUTOMAKE_OPTIONS = subdir-objects
|
||||||
ACLOCAL_AMFLAGS = -I m4
|
ACLOCAL_AMFLAGS = -I m4
|
||||||
|
|
||||||
|
## This seems to have become necessary for building in non-source directory.
|
||||||
|
|
||||||
|
AM_CPPFLAGS="-I$(srcdir)/src"
|
||||||
|
|
||||||
## Specify the documentation files that are distributed.
|
## Specify the documentation files that are distributed.
|
||||||
|
|
||||||
dist_doc_DATA = \
|
dist_doc_DATA = \
|
||||||
|
|
36
NEWS
36
NEWS
|
@ -1,6 +1,42 @@
|
||||||
News about PCRE2 releases
|
News about PCRE2 releases
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
|
Version 10.23 16-January-2017
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
1. ChangeLog has the details of a lot of bug fixes and tidies.
|
||||||
|
|
||||||
|
2. There has been a major re-factoring of the pcre2_compile.c file. Most syntax
|
||||||
|
checking is now done in the pre-pass that identifies capturing groups. This has
|
||||||
|
reduced the amount of duplication and made the code tidier. While doing this,
|
||||||
|
some minor bugs and Perl incompatibilities were fixed (see ChangeLog for
|
||||||
|
details.)
|
||||||
|
|
||||||
|
3. Back references are now permitted in lookbehind assertions when there are
|
||||||
|
no duplicated group numbers (that is, (?| has not been used), and, if the
|
||||||
|
reference is by name, there is only one group of that name. The referenced
|
||||||
|
group must, of course be of fixed length.
|
||||||
|
|
||||||
|
4. \g{+<number>} (e.g. \g{+2} ) is now supported. It is a "forward back
|
||||||
|
reference" and can be useful in repetitions (compare \g{-<number>} ). Perl does
|
||||||
|
not recognize this syntax.
|
||||||
|
|
||||||
|
5. pcre2grep now automatically expands its buffer up to a maximum set by
|
||||||
|
--max-buffer-size.
|
||||||
|
|
||||||
|
6. The -t option (grand total) has been added to pcre2grep.
|
||||||
|
|
||||||
|
7. A new function called pcre2_code_copy_with_tables() exists to copy a
|
||||||
|
compiled pattern along with a private copy of the character tables that is
|
||||||
|
uses.
|
||||||
|
|
||||||
|
8. A user supplied a number of patches to upgrade pcre2grep under Windows and
|
||||||
|
tidy the code.
|
||||||
|
|
||||||
|
9. Several updates have been made to pcre2test and test scripts (see
|
||||||
|
ChangeLog).
|
||||||
|
|
||||||
|
|
||||||
Version 10.22 29-July-2016
|
Version 10.22 29-July-2016
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
|
|
2
RunTest
2
RunTest
|
@ -520,7 +520,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||||
echo $title2 "(excluding UTF-$bits) (64M stack)"
|
echo $title2 "(excluding UTF-$bits) (64M stack)"
|
||||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q -S 64 $bmode $opt $testdata/testinput2 testtry
|
$sim $valgrind ${opt:+$vjs} ./pcre2test -q -S 64 $bmode $opt $testdata/testinput2 testtry
|
||||||
if [ $? = 0 ] ; then
|
if [ $? = 0 ] ; then
|
||||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189 >>testtry
|
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189,190,191 >>testtry
|
||||||
checkresult $? 2 "$opt"
|
checkresult $? 2 "$opt"
|
||||||
else
|
else
|
||||||
echo " "
|
echo " "
|
||||||
|
|
10
configure.ac
10
configure.ac
|
@ -11,16 +11,16 @@ dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||||
m4_define(pcre2_major, [10])
|
m4_define(pcre2_major, [10])
|
||||||
m4_define(pcre2_minor, [23])
|
m4_define(pcre2_minor, [23])
|
||||||
m4_define(pcre2_prerelease, [-RC1])
|
m4_define(pcre2_prerelease, [-RC1])
|
||||||
m4_define(pcre2_date, [2016-08-01])
|
m4_define(pcre2_date, [2017-01-16])
|
||||||
|
|
||||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||||
# 50 lines of this file. Please update that if the variables above are moved.
|
# 50 lines of this file. Please update that if the variables above are moved.
|
||||||
|
|
||||||
# Libtool shared library interface versions (current:revision:age)
|
# Libtool shared library interface versions (current:revision:age)
|
||||||
m4_define(libpcre2_8_version, [4:0:4])
|
m4_define(libpcre2_8_version, [5:0:5])
|
||||||
m4_define(libpcre2_16_version, [4:0:4])
|
m4_define(libpcre2_16_version, [5:0:5])
|
||||||
m4_define(libpcre2_32_version, [4:0:4])
|
m4_define(libpcre2_32_version, [5:0:5])
|
||||||
m4_define(libpcre2_posix_version, [1:0:0])
|
m4_define(libpcre2_posix_version, [1:1:0])
|
||||||
|
|
||||||
AC_PREREQ(2.57)
|
AC_PREREQ(2.57)
|
||||||
AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2)
|
AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2)
|
||||||
|
|
|
@ -45,8 +45,8 @@ is <b>pcre2_match()</b>.) The arguments for this function are:
|
||||||
<i>wscount</i> Number of elements in the vector
|
<i>wscount</i> Number of elements in the vector
|
||||||
</pre>
|
</pre>
|
||||||
For <b>pcre2_dfa_match()</b>, a match context is needed only if you want to set
|
For <b>pcre2_dfa_match()</b>, a match context is needed only if you want to set
|
||||||
up a callout function. The <i>length</i> and <i>startoffset</i> values are code
|
up a callout function or specify the recursion limit. The <i>length</i> and
|
||||||
units, not characters. The options are:
|
<i>startoffset</i> values are code units, not characters. The options are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ANCHORED Match only at the first position
|
PCRE2_ANCHORED Match only at the first position
|
||||||
PCRE2_NOTBOL Subject is not the beginning of a line
|
PCRE2_NOTBOL Subject is not the beginning of a line
|
||||||
|
|
|
@ -879,21 +879,23 @@ This limit is of use only if it is set smaller than <i>match_limit</i>.
|
||||||
Limiting the recursion depth limits the amount of system stack that can be
|
Limiting the recursion depth limits the amount of system stack that can be
|
||||||
used, or, when PCRE2 has been compiled to use memory on the heap instead of the
|
used, or, when PCRE2 has been compiled to use memory on the heap instead of the
|
||||||
stack, the amount of heap memory that can be used. This limit is not relevant,
|
stack, the amount of heap memory that can be used. This limit is not relevant,
|
||||||
and is ignored, when matching is done using JIT compiled code or by the
|
and is ignored, when matching is done using JIT compiled code. However, it is
|
||||||
<b>pcre2_dfa_match()</b> function.
|
supported by <b>pcre2_dfa_match()</b>, which uses recursive function calls less
|
||||||
|
frequently than <b>pcre2_match()</b>, but which can be caused to use a lot of
|
||||||
|
stack by a recursive pattern such as /(.)(?1)/ matched to a very long string.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The default value for <i>recursion_limit</i> can be set when PCRE2 is built; the
|
The default value for <i>recursion_limit</i> can be set when PCRE2 is built; the
|
||||||
default default is the same value as the default for <i>match_limit</i>. If the
|
default default is the same value as the default for <i>match_limit</i>. If the
|
||||||
limit is exceeded, <b>pcre2_match()</b> returns PCRE2_ERROR_RECURSIONLIMIT. A
|
limit is exceeded, <b>pcre2_match()</b> and <b>pcre2_dfa_match()</b> return
|
||||||
value for the recursion limit may also be supplied by an item at the start of a
|
PCRE2_ERROR_RECURSIONLIMIT. A value for the recursion limit may also be
|
||||||
pattern of the form
|
supplied by an item at the start of a pattern of the form
|
||||||
<pre>
|
<pre>
|
||||||
(*LIMIT_RECURSION=ddd)
|
(*LIMIT_RECURSION=ddd)
|
||||||
</pre>
|
</pre>
|
||||||
where ddd is a decimal number. However, such a setting is ignored unless ddd is
|
where ddd is a decimal number. However, such a setting is ignored unless ddd is
|
||||||
less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
|
less than the limit set by the caller of <b>pcre2_match()</b> or
|
||||||
limit is set, less than the default.
|
<b>pcre2_dfa_match()</b> or, if no such limit is set, less than the default.
|
||||||
<b>int pcre2_set_recursion_memory_management(</b>
|
<b>int pcre2_set_recursion_memory_management(</b>
|
||||||
<b> pcre2_match_context *<i>mcontext</i>,</b>
|
<b> pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
|
<b> void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
|
||||||
|
@ -1976,7 +1978,7 @@ Information about a successful or unsuccessful match is placed in a match
|
||||||
data block, which is an opaque structure that is accessed by function calls. In
|
data block, which is an opaque structure that is accessed by function calls. In
|
||||||
particular, the match data block contains a vector of offsets into the subject
|
particular, the match data block contains a vector of offsets into the subject
|
||||||
string that define the matched part of the subject and any substrings that were
|
string that define the matched part of the subject and any substrings that were
|
||||||
captured. This is know as the <i>ovector</i>.
|
captured. This is known as the <i>ovector</i>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Before calling <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b>, or
|
Before calling <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b>, or
|
||||||
|
@ -3263,7 +3265,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC41" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC41" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 22 November 2016
|
Last updated: 23 December 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -245,13 +245,23 @@ because <b>pcre2grep</b> has to search for all possible matches in a line, not
|
||||||
just one, in order to colour them all.
|
just one, in order to colour them all.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
The colour that is used can be specified by setting the environment variable
|
The colour that is used can be specified by setting one of the environment
|
||||||
PCRE2GREP_COLOUR or PCRE2GREP_COLOR. If neither of these are set,
|
variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, PCREGREP_COLOUR, or
|
||||||
<b>pcre2grep</b> looks for GREP_COLOUR or GREP_COLOR. The value of the variable
|
PCREGREP_COLOR, which are checked in that order. If none of these are set,
|
||||||
should be a string of two numbers, separated by a semicolon. They are copied
|
<b>pcre2grep</b> looks for GREP_COLORS or GREP_COLOR (in that order). The value
|
||||||
directly into the control string for setting colour on a terminal, so it is
|
of the variable should be a string of two numbers, separated by a semicolon,
|
||||||
your responsibility to ensure that they make sense. If neither of the
|
except in the case of GREP_COLORS, which must start with "ms=" or "mt="
|
||||||
environment variables is set, the default is "1;31", which gives red.
|
followed by two semicolon-separated colours, terminated by the end of the
|
||||||
|
string or by a colon. If GREP_COLORS does not start with "ms=" or "mt=" it is
|
||||||
|
ignored, and GREP_COLOR is checked.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
If the string obtained from one of the above variables contains any characters
|
||||||
|
other than semicolon or digits, the setting is ignored and the default colour
|
||||||
|
is used. The string is copied directly into the control string for setting
|
||||||
|
colour on a terminal, so it is your responsibility to ensure that the values
|
||||||
|
make sense. If no relevant environment variable is set, the default is "1;31",
|
||||||
|
which gives red.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-D</b> <i>action</i>, <b>--devices=</b><i>action</i>
|
<b>-D</b> <i>action</i>, <b>--devices=</b><i>action</i>
|
||||||
|
@ -773,12 +783,12 @@ character. Otherwise <b>pcre2grep</b> will assume that it has no data.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC10" href="#TOC1">CALLING EXTERNAL SCRIPTS</a><br>
|
<br><a name="SEC10" href="#TOC1">CALLING EXTERNAL SCRIPTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
On non-Windows systems, <b>pcre2grep</b> has, by default, support for calling
|
<b>pcre2grep</b> has, by default, support for calling external programs or
|
||||||
external programs or scripts during matching by making use of PCRE2's callout
|
scripts during matching by making use of PCRE2's callout facility. However,
|
||||||
facility. However, this support can be disabled when <b>pcre2grep</b> is built.
|
this support can be disabled when <b>pcre2grep</b> is built. You can find out
|
||||||
You can find out whether your binary has support for callouts by running it
|
whether your binary has support for callouts by running it with the <b>--help</b>
|
||||||
with the <b>--help</b> option. If the support is not enabled, all callouts in
|
option. If the support is not enabled, all callouts in patterns are ignored by
|
||||||
patterns are ignored by <b>pcre2grep</b>.
|
<b>pcre2grep</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
|
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
|
||||||
|
@ -860,7 +870,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 31 October 2016
|
Last updated: 31 December 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -190,6 +190,12 @@ be less than the value set (or defaulted) by the caller of <b>pcre2_match()</b>
|
||||||
for it to have any effect. In other words, the pattern writer can lower the
|
for it to have any effect. In other words, the pattern writer can lower the
|
||||||
limits set by the programmer, but not raise them. If there is more than one
|
limits set by the programmer, but not raise them. If there is more than one
|
||||||
setting of one of these limits, the lower value is used.
|
setting of one of these limits, the lower value is used.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The match limit is used (but in a different way) when JIT is being used, but it
|
||||||
|
is not relevant, and is ignored, when matching with <b>pcre2_dfa_match()</b>.
|
||||||
|
However, the recursion limit is relevant for DFA matching, which does use some
|
||||||
|
function recursion, in particular, for recursions within the pattern.
|
||||||
<a name="newlines"></a></P>
|
<a name="newlines"></a></P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Newline conventions
|
Newline conventions
|
||||||
|
@ -1347,10 +1353,10 @@ or immediately after a range. For example, [b-d-z] matches letters in the range
|
||||||
b to d, a hyphen character, or z.
|
b to d, a hyphen character, or z.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Perl treats a hyphen as a literal if it appears before a POSIX class (see
|
Perl treats a hyphen as a literal if it appears before or after a POSIX class
|
||||||
below) or a character type escape such as as \d, but gives a warning in its
|
(see below) or a character type escape such as as \d, but gives a warning in
|
||||||
warning mode, as this is most likely a user error. As PCRE2 has no facility for
|
its warning mode, as this is most likely a user error. As PCRE2 has no facility
|
||||||
warning, an error is given in these cases.
|
for warning, an error is given in these cases.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
It is not possible to have the literal character "]" as the end character of a
|
It is not possible to have the literal character "]" as the end character of a
|
||||||
|
@ -3446,7 +3452,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 23 October 2016
|
Last updated: 27 December 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -57,12 +57,13 @@ assertion and "once-only" subpatterns, which are handled like subroutine calls.
|
||||||
Normally, these are never very deep, and the limit on the complexity of
|
Normally, these are never very deep, and the limit on the complexity of
|
||||||
<b>pcre2_dfa_match()</b> is controlled by the amount of workspace it is given.
|
<b>pcre2_dfa_match()</b> is controlled by the amount of workspace it is given.
|
||||||
However, it is possible to write patterns with runaway infinite recursions;
|
However, it is possible to write patterns with runaway infinite recursions;
|
||||||
such patterns will cause <b>pcre2_dfa_match()</b> to run out of stack. At
|
such patterns will cause <b>pcre2_dfa_match()</b> to run out of stack unless a
|
||||||
present, there is no protection against this.
|
limit is applied (see below).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The comments that follow do NOT apply to <b>pcre2_dfa_match()</b>; they are
|
The comments in the next three sections do not apply to
|
||||||
relevant only for <b>pcre2_match()</b> without the JIT optimization.
|
<b>pcre2_dfa_match()</b>; they are relevant only for <b>pcre2_match()</b> without
|
||||||
|
the JIT optimization.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Reducing <b>pcre2_match()</b>'s stack usage
|
Reducing <b>pcre2_match()</b>'s stack usage
|
||||||
|
@ -115,7 +116,7 @@ entitled
|
||||||
in the
|
in the
|
||||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
documentation. Since the block sizes are always the same, it may be possible to
|
documentation. Since the block sizes are always the same, it may be possible to
|
||||||
implement customized a memory handler that is more efficient than the standard
|
implement a customized memory handler that is more efficient than the standard
|
||||||
function. The memory blocks obtained for this purpose are retained and re-used
|
function. The memory blocks obtained for this purpose are retained and re-used
|
||||||
if possible while <b>pcre2_match()</b> is running. They are all freed just
|
if possible while <b>pcre2_match()</b> is running. They are all freed just
|
||||||
before it exits.
|
before it exits.
|
||||||
|
@ -151,6 +152,15 @@ pattern to match. This is done by calling <b>pcre2_match()</b> repeatedly with
|
||||||
different limits.
|
different limits.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
Limiting <b>pcre2_dfa_match()</b>'s stack usage
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
The recursion limit, as described above for <b>pcre2_match()</b>, also applies
|
||||||
|
to <b>pcre2_dfa_match()</b>, whose use of recursive function calls for
|
||||||
|
recursions in the pattern can lead to runaway stack usage. The non-recursive
|
||||||
|
match limit is not relevant for DFA matching, and is ignored.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
Changing stack size in Unix-like systems
|
Changing stack size in Unix-like systems
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -198,9 +208,9 @@ Cambridge, England.
|
||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 21 November 2014
|
Last updated: 23 December 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2014 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -451,9 +451,10 @@ appear.
|
||||||
(*UCP) set PCRE2_UCP (use Unicode properties for \d etc)
|
(*UCP) set PCRE2_UCP (use Unicode properties for \d etc)
|
||||||
</pre>
|
</pre>
|
||||||
Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of the
|
Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of the
|
||||||
limits set by the caller of pcre2_match(), not increase them. The application
|
limits set by the caller of <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>, not
|
||||||
can lock out the use of (*UTF) and (*UCP) by setting the PCRE2_NEVER_UTF or
|
increase them. The application can lock out the use of (*UTF) and (*UCP) by
|
||||||
PCRE2_NEVER_UCP options, respectively, at compile time.
|
setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at
|
||||||
|
compile time.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC17" href="#TOC1">NEWLINE CONVENTION</a><br>
|
<br><a name="SEC17" href="#TOC1">NEWLINE CONVENTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -595,7 +596,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 28 September 2016
|
Last updated: 23 December 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -153,8 +153,13 @@ the 32-bit library has been built, this is the default. If the 32-bit library
|
||||||
has not been built, this option causes an error.
|
has not been built, this option causes an error.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>-ac</b>
|
||||||
|
Behave as if each pattern has the <b>auto_callout</b> modifier, that is, insert
|
||||||
|
automatic callouts into every pattern that is compiled.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
<b>-b</b>
|
<b>-b</b>
|
||||||
Behave as if each pattern has the <b>/fullbincode</b> modifier; the full
|
Behave as if each pattern has the <b>fullbincode</b> modifier; the full
|
||||||
internal binary form of the pattern is output after compilation.
|
internal binary form of the pattern is output after compilation.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -220,7 +225,7 @@ Output a brief summary these options and then exit.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-i</b>
|
<b>-i</b>
|
||||||
Behave as if each pattern has the <b>/info</b> modifier; information about the
|
Behave as if each pattern has the <b>info</b> modifier; information about the
|
||||||
compiled pattern is given after compilation.
|
compiled pattern is given after compilation.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -806,7 +811,7 @@ modes are to be compiled:
|
||||||
2 compile JIT code for soft partial matching
|
2 compile JIT code for soft partial matching
|
||||||
4 compile JIT code for hard partial matching
|
4 compile JIT code for hard partial matching
|
||||||
</pre>
|
</pre>
|
||||||
The possible values for the <b>/jit</b> modifier are therefore:
|
The possible values for the <b>jit</b> modifier are therefore:
|
||||||
<pre>
|
<pre>
|
||||||
0 disable JIT
|
0 disable JIT
|
||||||
1 normal matching only
|
1 normal matching only
|
||||||
|
@ -852,14 +857,14 @@ code was actually used in the match.
|
||||||
Setting a locale
|
Setting a locale
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The <b>/locale</b> modifier must specify the name of a locale, for example:
|
The <b>locale</b> modifier must specify the name of a locale, for example:
|
||||||
<pre>
|
<pre>
|
||||||
/pattern/locale=fr_FR
|
/pattern/locale=fr_FR
|
||||||
</pre>
|
</pre>
|
||||||
The given locale is set, <b>pcre2_maketables()</b> is called to build a set of
|
The given locale is set, <b>pcre2_maketables()</b> is called to build a set of
|
||||||
character tables for the locale, and this is then passed to
|
character tables for the locale, and this is then passed to
|
||||||
<b>pcre2_compile()</b> when compiling the regular expression. The same tables
|
<b>pcre2_compile()</b> when compiling the regular expression. The same tables
|
||||||
are used when matching the following subject lines. The <b>/locale</b> modifier
|
are used when matching the following subject lines. The <b>locale</b> modifier
|
||||||
applies only to the pattern on which it appears, but can be given in a
|
applies only to the pattern on which it appears, but can be given in a
|
||||||
<b>#pattern</b> command if a default is needed. Setting a locale and alternate
|
<b>#pattern</b> command if a default is needed. Setting a locale and alternate
|
||||||
character tables are mutually exclusive.
|
character tables are mutually exclusive.
|
||||||
|
@ -868,7 +873,7 @@ character tables are mutually exclusive.
|
||||||
Showing pattern memory
|
Showing pattern memory
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The <b>/memory</b> modifier causes the size in bytes of the memory used to hold
|
The <b>memory</b> modifier causes the size in bytes of the memory used to hold
|
||||||
the compiled pattern to be output. This does not include the size of the
|
the compiled pattern to be output. This does not include the size of the
|
||||||
<b>pcre2_code</b> block; it is just the actual compiled data. If the pattern is
|
<b>pcre2_code</b> block; it is just the actual compiled data. If the pattern is
|
||||||
subsequently passed to the JIT compiler, the size of the JIT compiled code is
|
subsequently passed to the JIT compiler, the size of the JIT compiled code is
|
||||||
|
@ -937,7 +942,7 @@ an error.
|
||||||
Testing the stack guard feature
|
Testing the stack guard feature
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The <b>/stackguard</b> modifier is used to test the use of
|
The <b>stackguard</b> modifier is used to test the use of
|
||||||
<b>pcre2_set_compile_recursion_guard()</b>, a function that is provided to
|
<b>pcre2_set_compile_recursion_guard()</b>, a function that is provided to
|
||||||
enable stack availability to be checked during compilation (see the
|
enable stack availability to be checked during compilation (see the
|
||||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
|
@ -952,7 +957,7 @@ be aborted.
|
||||||
Using alternative character tables
|
Using alternative character tables
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The value specified for the <b>/tables</b> modifier must be one of the digits 0,
|
The value specified for the <b>tables</b> modifier must be one of the digits 0,
|
||||||
1, or 2. It causes a specific set of built-in character tables to be passed to
|
1, or 2. It causes a specific set of built-in character tables to be passed to
|
||||||
<b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour with
|
<b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour with
|
||||||
different character tables. The digit specifies the tables as follows:
|
different character tables. The digit specifies the tables as follows:
|
||||||
|
@ -1042,7 +1047,7 @@ The partial matching modifiers are provided with abbreviations because they
|
||||||
appear frequently in tests.
|
appear frequently in tests.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If the <b>/posix</b> modifier was present on the pattern, causing the POSIX
|
If the <b>posix</b> modifier was present on the pattern, causing the POSIX
|
||||||
wrapper API to be used, the only option-setting modifiers that have any effect
|
wrapper API to be used, the only option-setting modifiers that have any effect
|
||||||
are <b>notbol</b>, <b>notempty</b>, and <b>noteol</b>, causing REG_NOTBOL,
|
are <b>notbol</b>, <b>notempty</b>, and <b>noteol</b>, causing REG_NOTBOL,
|
||||||
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to <b>regexec()</b>.
|
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to <b>regexec()</b>.
|
||||||
|
@ -1064,6 +1069,7 @@ pattern.
|
||||||
altglobal alternative global matching
|
altglobal alternative global matching
|
||||||
callout_capture show captures at callout time
|
callout_capture show captures at callout time
|
||||||
callout_data=<n> set a value to pass via callouts
|
callout_data=<n> set a value to pass via callouts
|
||||||
|
callout_error=<n>[:<m>] control callout error
|
||||||
callout_fail=<n>[:<m>] control callout failure
|
callout_fail=<n>[:<m>] control callout failure
|
||||||
callout_none do not supply a callout function
|
callout_none do not supply a callout function
|
||||||
copy=<number or name> copy captured substring
|
copy=<number or name> copy captured substring
|
||||||
|
@ -1159,15 +1165,22 @@ Testing callouts
|
||||||
<P>
|
<P>
|
||||||
A callout function is supplied when <b>pcre2test</b> calls the library matching
|
A callout function is supplied when <b>pcre2test</b> calls the library matching
|
||||||
functions, unless <b>callout_none</b> is specified. If <b>callout_capture</b> is
|
functions, unless <b>callout_none</b> is specified. If <b>callout_capture</b> is
|
||||||
set, the current captured groups are output when a callout occurs.
|
set, the current captured groups are output when a callout occurs. The default
|
||||||
|
return from the callout function is zero, which allows matching to continue.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>callout_fail</b> modifier can be given one or two numbers. If there is
|
The <b>callout_fail</b> modifier can be given one or two numbers. If there is
|
||||||
only one number, 1 is returned instead of 0 when a callout of that number is
|
only one number, 1 is returned instead of 0 (causing matching to backtrack)
|
||||||
reached. If two numbers are given, 1 is returned when callout <n> is reached
|
when a callout of that number is reached. If two numbers (<n>:<m>) are given, 1
|
||||||
for the <m>th time. Note that callouts with string arguments are always given
|
is returned when callout <n> is reached and there have been at least <m>
|
||||||
the number zero. See "Callouts" below for a description of the output when a
|
callouts. The <b>callout_error</b> modifier is similar, except that
|
||||||
callout it taken.
|
PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be
|
||||||
|
aborted. If both these modifiers are set for the same callout number,
|
||||||
|
<b>callout_error</b> takes precedence.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Note that callouts with string arguments are always given the number zero. See
|
||||||
|
"Callouts" below for a description of the output when a callout it taken.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>callout_data</b> modifier can be given an unsigned or a negative number.
|
The <b>callout_data</b> modifier can be given an unsigned or a negative number.
|
||||||
|
@ -1180,7 +1193,7 @@ Finding all matches in a string
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Searching for all possible matches within a subject can be requested by the
|
Searching for all possible matches within a subject can be requested by the
|
||||||
<b>global</b> or <b>/altglobal</b> modifier. After finding a match, the matching
|
<b>global</b> or <b>altglobal</b> modifier. After finding a match, the matching
|
||||||
function is called again to search the remainder of the subject. The difference
|
function is called again to search the remainder of the subject. The difference
|
||||||
between <b>global</b> and <b>altglobal</b> is that the former uses the
|
between <b>global</b> and <b>altglobal</b> is that the former uses the
|
||||||
<i>start_offset</i> argument to <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>
|
<i>start_offset</i> argument to <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>
|
||||||
|
@ -1480,7 +1493,7 @@ unset substring is shown as "<unset>", as for the second data line.
|
||||||
If the strings contain any non-printing characters, they are output as \xhh
|
If the strings contain any non-printing characters, they are output as \xhh
|
||||||
escapes if the value is less than 256 and UTF mode is not set. Otherwise they
|
escapes if the value is less than 256 and UTF mode is not set. Otherwise they
|
||||||
are output as \x{hh...} escapes. See below for the definition of non-printing
|
are output as \x{hh...} escapes. See below for the definition of non-printing
|
||||||
characters. If the <b>/aftertext</b> modifier is set, the output for substring
|
characters. If the <b>aftertext</b> modifier is set, the output for substring
|
||||||
0 is followed by the the rest of the subject string, identified by "0+" like
|
0 is followed by the the rest of the subject string, identified by "0+" like
|
||||||
this:
|
this:
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -1673,7 +1686,7 @@ therefore shown as hex escapes.
|
||||||
<P>
|
<P>
|
||||||
When <b>pcre2test</b> is outputting text that is a matched part of a subject
|
When <b>pcre2test</b> is outputting text that is a matched part of a subject
|
||||||
string, it behaves in the same way, unless a different locale has been set for
|
string, it behaves in the same way, unless a different locale has been set for
|
||||||
the pattern (using the <b>/locale</b> modifier). In this case, the
|
the pattern (using the <b>locale</b> modifier). In this case, the
|
||||||
<b>isprint()</b> function is used to distinguish printing and non-printing
|
<b>isprint()</b> function is used to distinguish printing and non-printing
|
||||||
characters.
|
characters.
|
||||||
<a name="saverestore"></a></P>
|
<a name="saverestore"></a></P>
|
||||||
|
@ -1766,7 +1779,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 04 November 2016
|
Last updated: 28 December 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
1959
doc/pcre2.txt
1959
doc/pcre2.txt
File diff suppressed because it is too large
Load Diff
|
@ -219,15 +219,25 @@ OPTIONS
|
||||||
possible matches in a line, not just one, in order to colour
|
possible matches in a line, not just one, in order to colour
|
||||||
them all.
|
them all.
|
||||||
|
|
||||||
The colour that is used can be specified by setting the envi-
|
The colour that is used can be specified by setting one of
|
||||||
ronment variable PCRE2GREP_COLOUR or PCRE2GREP_COLOR. If nei-
|
the environment variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR,
|
||||||
ther of these are set, pcre2grep looks for GREP_COLOUR or
|
PCREGREP_COLOUR, or PCREGREP_COLOR, which are checked in that
|
||||||
GREP_COLOR. The value of the variable should be a string of
|
order. If none of these are set, pcre2grep looks for
|
||||||
two numbers, separated by a semicolon. They are copied
|
GREP_COLORS or GREP_COLOR (in that order). The value of the
|
||||||
directly into the control string for setting colour on a ter-
|
variable should be a string of two numbers, separated by a
|
||||||
minal, so it is your responsibility to ensure that they make
|
semicolon, except in the case of GREP_COLORS, which must
|
||||||
sense. If neither of the environment variables is set, the
|
start with "ms=" or "mt=" followed by two semicolon-separated
|
||||||
default is "1;31", which gives red.
|
colours, terminated by the end of the string or by a colon.
|
||||||
|
If GREP_COLORS does not start with "ms=" or "mt=" it is
|
||||||
|
ignored, and GREP_COLOR is checked.
|
||||||
|
|
||||||
|
If the string obtained from one of the above variables con-
|
||||||
|
tains any characters other than semicolon or digits, the set-
|
||||||
|
ting is ignored and the default colour is used. The string is
|
||||||
|
copied directly into the control string for setting colour on
|
||||||
|
a terminal, so it is your responsibility to ensure that the
|
||||||
|
values make sense. If no relevant environment variable is
|
||||||
|
set, the default is "1;31", which gives red.
|
||||||
|
|
||||||
-D action, --devices=action
|
-D action, --devices=action
|
||||||
If an input path is not a regular file or a directory,
|
If an input path is not a regular file or a directory,
|
||||||
|
@ -763,12 +773,12 @@ OPTIONS WITH DATA
|
||||||
|
|
||||||
CALLING EXTERNAL SCRIPTS
|
CALLING EXTERNAL SCRIPTS
|
||||||
|
|
||||||
On non-Windows systems, pcre2grep has, by default, support for calling
|
pcre2grep has, by default, support for calling external programs or
|
||||||
external programs or scripts during matching by making use of PCRE2's
|
scripts during matching by making use of PCRE2's callout facility. How-
|
||||||
callout facility. However, this support can be disabled when pcre2grep
|
ever, this support can be disabled when pcre2grep is built. You can
|
||||||
is built. You can find out whether your binary has support for call-
|
find out whether your binary has support for callouts by running it
|
||||||
outs by running it with the --help option. If the support is not
|
with the --help option. If the support is not enabled, all callouts in
|
||||||
enabled, all callouts in patterns are ignored by pcre2grep.
|
patterns are ignored by pcre2grep.
|
||||||
|
|
||||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
|
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
|
||||||
ment is either a number or a quoted string (see the pcre2callout docu-
|
ment is either a number or a quoted string (see the pcre2callout docu-
|
||||||
|
@ -852,5 +862,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 31 October 2016
|
Last updated: 31 December 2016
|
||||||
Copyright (c) 1997-2016 University of Cambridge.
|
Copyright (c) 1997-2016 University of Cambridge.
|
||||||
|
|
|
@ -116,7 +116,11 @@ COMMAND LINE OPTIONS
|
||||||
is the default. If the 32-bit library has not been built,
|
is the default. If the 32-bit library has not been built,
|
||||||
this option causes an error.
|
this option causes an error.
|
||||||
|
|
||||||
-b Behave as if each pattern has the /fullbincode modifier; the
|
-ac Behave as if each pattern has the auto_callout modifier, that
|
||||||
|
is, insert automatic callouts into every pattern that is com-
|
||||||
|
piled.
|
||||||
|
|
||||||
|
-b Behave as if each pattern has the fullbincode modifier; the
|
||||||
full internal binary form of the pattern is output after com-
|
full internal binary form of the pattern is output after com-
|
||||||
pilation.
|
pilation.
|
||||||
|
|
||||||
|
@ -174,7 +178,7 @@ COMMAND LINE OPTIONS
|
||||||
|
|
||||||
-help Output a brief summary these options and then exit.
|
-help Output a brief summary these options and then exit.
|
||||||
|
|
||||||
-i Behave as if each pattern has the /info modifier; information
|
-i Behave as if each pattern has the info modifier; information
|
||||||
about the compiled pattern is given after compilation.
|
about the compiled pattern is given after compilation.
|
||||||
|
|
||||||
-jit Behave as if each pattern line has the jit modifier; after
|
-jit Behave as if each pattern line has the jit modifier; after
|
||||||
|
@ -724,7 +728,7 @@ PATTERN MODIFIERS
|
||||||
2 compile JIT code for soft partial matching
|
2 compile JIT code for soft partial matching
|
||||||
4 compile JIT code for hard partial matching
|
4 compile JIT code for hard partial matching
|
||||||
|
|
||||||
The possible values for the /jit modifier are therefore:
|
The possible values for the jit modifier are therefore:
|
||||||
|
|
||||||
0 disable JIT
|
0 disable JIT
|
||||||
1 normal matching only
|
1 normal matching only
|
||||||
|
@ -764,24 +768,24 @@ PATTERN MODIFIERS
|
||||||
|
|
||||||
Setting a locale
|
Setting a locale
|
||||||
|
|
||||||
The /locale modifier must specify the name of a locale, for example:
|
The locale modifier must specify the name of a locale, for example:
|
||||||
|
|
||||||
/pattern/locale=fr_FR
|
/pattern/locale=fr_FR
|
||||||
|
|
||||||
The given locale is set, pcre2_maketables() is called to build a set of
|
The given locale is set, pcre2_maketables() is called to build a set of
|
||||||
character tables for the locale, and this is then passed to pcre2_com-
|
character tables for the locale, and this is then passed to pcre2_com-
|
||||||
pile() when compiling the regular expression. The same tables are used
|
pile() when compiling the regular expression. The same tables are used
|
||||||
when matching the following subject lines. The /locale modifier applies
|
when matching the following subject lines. The locale modifier applies
|
||||||
only to the pattern on which it appears, but can be given in a #pattern
|
only to the pattern on which it appears, but can be given in a #pattern
|
||||||
command if a default is needed. Setting a locale and alternate charac-
|
command if a default is needed. Setting a locale and alternate charac-
|
||||||
ter tables are mutually exclusive.
|
ter tables are mutually exclusive.
|
||||||
|
|
||||||
Showing pattern memory
|
Showing pattern memory
|
||||||
|
|
||||||
The /memory modifier causes the size in bytes of the memory used to
|
The memory modifier causes the size in bytes of the memory used to hold
|
||||||
hold the compiled pattern to be output. This does not include the size
|
the compiled pattern to be output. This does not include the size of
|
||||||
of the pcre2_code block; it is just the actual compiled data. If the
|
the pcre2_code block; it is just the actual compiled data. If the pat-
|
||||||
pattern is subsequently passed to the JIT compiler, the size of the JIT
|
tern is subsequently passed to the JIT compiler, the size of the JIT
|
||||||
compiled code is also output. Here is an example:
|
compiled code is also output. Here is an example:
|
||||||
|
|
||||||
re> /a(b)c/jit,memory
|
re> /a(b)c/jit,memory
|
||||||
|
@ -837,7 +841,7 @@ PATTERN MODIFIERS
|
||||||
|
|
||||||
Testing the stack guard feature
|
Testing the stack guard feature
|
||||||
|
|
||||||
The /stackguard modifier is used to test the use of pcre2_set_com-
|
The stackguard modifier is used to test the use of pcre2_set_com-
|
||||||
pile_recursion_guard(), a function that is provided to enable stack
|
pile_recursion_guard(), a function that is provided to enable stack
|
||||||
availability to be checked during compilation (see the pcre2api docu-
|
availability to be checked during compilation (see the pcre2api docu-
|
||||||
mentation for details). If the number specified by the modifier is
|
mentation for details). If the number specified by the modifier is
|
||||||
|
@ -849,7 +853,7 @@ PATTERN MODIFIERS
|
||||||
|
|
||||||
Using alternative character tables
|
Using alternative character tables
|
||||||
|
|
||||||
The value specified for the /tables modifier must be one of the digits
|
The value specified for the tables modifier must be one of the digits
|
||||||
0, 1, or 2. It causes a specific set of built-in character tables to be
|
0, 1, or 2. It causes a specific set of built-in character tables to be
|
||||||
passed to pcre2_compile(). This is used in the PCRE2 tests to check be-
|
passed to pcre2_compile(). This is used in the PCRE2 tests to check be-
|
||||||
haviour with different character tables. The digit specifies the tables
|
haviour with different character tables. The digit specifies the tables
|
||||||
|
@ -931,7 +935,7 @@ SUBJECT MODIFIERS
|
||||||
The partial matching modifiers are provided with abbreviations because
|
The partial matching modifiers are provided with abbreviations because
|
||||||
they appear frequently in tests.
|
they appear frequently in tests.
|
||||||
|
|
||||||
If the /posix modifier was present on the pattern, causing the POSIX
|
If the posix modifier was present on the pattern, causing the POSIX
|
||||||
wrapper API to be used, the only option-setting modifiers that have any
|
wrapper API to be used, the only option-setting modifiers that have any
|
||||||
effect are notbol, notempty, and noteol, causing REG_NOTBOL,
|
effect are notbol, notempty, and noteol, causing REG_NOTBOL,
|
||||||
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to regexec().
|
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to regexec().
|
||||||
|
@ -951,6 +955,7 @@ SUBJECT MODIFIERS
|
||||||
altglobal alternative global matching
|
altglobal alternative global matching
|
||||||
callout_capture show captures at callout time
|
callout_capture show captures at callout time
|
||||||
callout_data=<n> set a value to pass via callouts
|
callout_data=<n> set a value to pass via callouts
|
||||||
|
callout_error=<n>[:<m>] control callout error
|
||||||
callout_fail=<n>[:<m>] control callout failure
|
callout_fail=<n>[:<m>] control callout failure
|
||||||
callout_none do not supply a callout function
|
callout_none do not supply a callout function
|
||||||
copy=<number or name> copy captured substring
|
copy=<number or name> copy captured substring
|
||||||
|
@ -1042,14 +1047,22 @@ SUBJECT MODIFIERS
|
||||||
|
|
||||||
A callout function is supplied when pcre2test calls the library match-
|
A callout function is supplied when pcre2test calls the library match-
|
||||||
ing functions, unless callout_none is specified. If callout_capture is
|
ing functions, unless callout_none is specified. If callout_capture is
|
||||||
set, the current captured groups are output when a callout occurs.
|
set, the current captured groups are output when a callout occurs. The
|
||||||
|
default return from the callout function is zero, which allows matching
|
||||||
|
to continue.
|
||||||
|
|
||||||
The callout_fail modifier can be given one or two numbers. If there is
|
The callout_fail modifier can be given one or two numbers. If there is
|
||||||
only one number, 1 is returned instead of 0 when a callout of that num-
|
only one number, 1 is returned instead of 0 (causing matching to back-
|
||||||
ber is reached. If two numbers are given, 1 is returned when callout
|
track) when a callout of that number is reached. If two numbers
|
||||||
<n> is reached for the <m>th time. Note that callouts with string argu-
|
(<n>:<m>) are given, 1 is returned when callout <n> is reached and
|
||||||
ments are always given the number zero. See "Callouts" below for a
|
there have been at least <m> callouts. The callout_error modifier is
|
||||||
description of the output when a callout it taken.
|
similar, except that PCRE2_ERROR_CALLOUT is returned, causing the
|
||||||
|
entire matching process to be aborted. If both these modifiers are set
|
||||||
|
for the same callout number, callout_error takes precedence.
|
||||||
|
|
||||||
|
Note that callouts with string arguments are always given the number
|
||||||
|
zero. See "Callouts" below for a description of the output when a call-
|
||||||
|
out it taken.
|
||||||
|
|
||||||
The callout_data modifier can be given an unsigned or a negative num-
|
The callout_data modifier can be given an unsigned or a negative num-
|
||||||
ber. This is set as the "user data" that is passed to the matching
|
ber. This is set as the "user data" that is passed to the matching
|
||||||
|
@ -1060,7 +1073,7 @@ SUBJECT MODIFIERS
|
||||||
Finding all matches in a string
|
Finding all matches in a string
|
||||||
|
|
||||||
Searching for all possible matches within a subject can be requested by
|
Searching for all possible matches within a subject can be requested by
|
||||||
the global or /altglobal modifier. After finding a match, the matching
|
the global or altglobal modifier. After finding a match, the matching
|
||||||
function is called again to search the remainder of the subject. The
|
function is called again to search the remainder of the subject. The
|
||||||
difference between global and altglobal is that the former uses the
|
difference between global and altglobal is that the former uses the
|
||||||
start_offset argument to pcre2_match() or pcre2_dfa_match() to start
|
start_offset argument to pcre2_match() or pcre2_dfa_match() to start
|
||||||
|
@ -1329,7 +1342,7 @@ DEFAULT OUTPUT FROM pcre2test
|
||||||
If the strings contain any non-printing characters, they are output as
|
If the strings contain any non-printing characters, they are output as
|
||||||
\xhh escapes if the value is less than 256 and UTF mode is not set.
|
\xhh escapes if the value is less than 256 and UTF mode is not set.
|
||||||
Otherwise they are output as \x{hh...} escapes. See below for the defi-
|
Otherwise they are output as \x{hh...} escapes. See below for the defi-
|
||||||
nition of non-printing characters. If the /aftertext modifier is set,
|
nition of non-printing characters. If the aftertext modifier is set,
|
||||||
the output for substring 0 is followed by the the rest of the subject
|
the output for substring 0 is followed by the the rest of the subject
|
||||||
string, identified by "0+" like this:
|
string, identified by "0+" like this:
|
||||||
|
|
||||||
|
@ -1512,7 +1525,7 @@ NON-PRINTING CHARACTERS
|
||||||
|
|
||||||
When pcre2test is outputting text that is a matched part of a subject
|
When pcre2test is outputting text that is a matched part of a subject
|
||||||
string, it behaves in the same way, unless a different locale has been
|
string, it behaves in the same way, unless a different locale has been
|
||||||
set for the pattern (using the /locale modifier). In this case, the
|
set for the pattern (using the locale modifier). In this case, the
|
||||||
isprint() function is used to distinguish printing and non-printing
|
isprint() function is used to distinguish printing and non-printing
|
||||||
characters.
|
characters.
|
||||||
|
|
||||||
|
@ -1601,5 +1614,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 04 November 2016
|
Last updated: 28 December 2016
|
||||||
Copyright (c) 1997-2016 University of Cambridge.
|
Copyright (c) 1997-2016 University of Cambridge.
|
||||||
|
|
|
@ -78,6 +78,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
/* Define to 1 if you have the <memory.h> header file. */
|
/* Define to 1 if you have the <memory.h> header file. */
|
||||||
/* #undef HAVE_MEMORY_H */
|
/* #undef HAVE_MEMORY_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `mkostemp' function. */
|
||||||
|
/* #undef HAVE_MKOSTEMP */
|
||||||
|
|
||||||
/* Define if you have POSIX threads libraries and header files. */
|
/* Define if you have POSIX threads libraries and header files. */
|
||||||
/* #undef HAVE_PTHREAD */
|
/* #undef HAVE_PTHREAD */
|
||||||
|
|
||||||
|
@ -90,6 +93,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||||
/* #undef HAVE_READLINE_READLINE_H */
|
/* #undef HAVE_READLINE_READLINE_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `secure_getenv' function. */
|
||||||
|
/* #undef HAVE_SECURE_GETENV */
|
||||||
|
|
||||||
/* Define to 1 if you have the <stdint.h> header file. */
|
/* Define to 1 if you have the <stdint.h> header file. */
|
||||||
/* #undef HAVE_STDINT_H */
|
/* #undef HAVE_STDINT_H */
|
||||||
|
|
||||||
|
@ -308,9 +314,40 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
/* Define to any value for valgrind support to find invalid memory reads. */
|
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||||
/* #undef SUPPORT_VALGRIND */
|
/* #undef SUPPORT_VALGRIND */
|
||||||
|
|
||||||
|
/* Enable extensions on AIX 3, Interix. */
|
||||||
|
#ifndef _ALL_SOURCE
|
||||||
|
# define _ALL_SOURCE 1
|
||||||
|
#endif
|
||||||
|
/* Enable GNU extensions on systems that have them. */
|
||||||
|
#ifndef _GNU_SOURCE
|
||||||
|
# define _GNU_SOURCE 1
|
||||||
|
#endif
|
||||||
|
/* Enable threading extensions on Solaris. */
|
||||||
|
#ifndef _POSIX_PTHREAD_SEMANTICS
|
||||||
|
# define _POSIX_PTHREAD_SEMANTICS 1
|
||||||
|
#endif
|
||||||
|
/* Enable extensions on HP NonStop. */
|
||||||
|
#ifndef _TANDEM_SOURCE
|
||||||
|
# define _TANDEM_SOURCE 1
|
||||||
|
#endif
|
||||||
|
/* Enable general extensions on Solaris. */
|
||||||
|
#ifndef __EXTENSIONS__
|
||||||
|
# define __EXTENSIONS__ 1
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Version number of package */
|
/* Version number of package */
|
||||||
#define VERSION "10.23-RC1"
|
#define VERSION "10.23-RC1"
|
||||||
|
|
||||||
|
/* Define to 1 if on MINIX. */
|
||||||
|
/* #undef _MINIX */
|
||||||
|
|
||||||
|
/* Define to 2 if the system does not provide POSIX.1 features except with
|
||||||
|
this defined. */
|
||||||
|
/* #undef _POSIX_1_SOURCE */
|
||||||
|
|
||||||
|
/* Define to 1 if you need to in order for `stat' and other things to work. */
|
||||||
|
/* #undef _POSIX_SOURCE */
|
||||||
|
|
||||||
/* Define to empty if `const' does not conform to ANSI C. */
|
/* Define to empty if `const' does not conform to ANSI C. */
|
||||||
/* #undef const */
|
/* #undef const */
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define PCRE2_MAJOR 10
|
#define PCRE2_MAJOR 10
|
||||||
#define PCRE2_MINOR 23
|
#define PCRE2_MINOR 23
|
||||||
#define PCRE2_PRERELEASE -RC1
|
#define PCRE2_PRERELEASE -RC1
|
||||||
#define PCRE2_DATE 2016-08-01
|
#define PCRE2_DATE 2017-01-16
|
||||||
|
|
||||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||||
imported have to be identified as such. When building PCRE2, the appropriate
|
imported have to be identified as such. When building PCRE2, the appropriate
|
||||||
|
|
|
@ -44,7 +44,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define PCRE2_MAJOR 10
|
#define PCRE2_MAJOR 10
|
||||||
#define PCRE2_MINOR 23
|
#define PCRE2_MINOR 23
|
||||||
#define PCRE2_PRERELEASE -RC1
|
#define PCRE2_PRERELEASE -RC1
|
||||||
#define PCRE2_DATE 2016-08-01
|
#define PCRE2_DATE 2017-01-16
|
||||||
|
|
||||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||||
imported have to be identified as such. When building PCRE2, the appropriate
|
imported have to be identified as such. When building PCRE2, the appropriate
|
||||||
|
@ -465,7 +465,9 @@ PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
pcre2_code_free(pcre2_code *); \
|
pcre2_code_free(pcre2_code *); \
|
||||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||||
*pcre2_code_copy(const pcre2_code *);
|
*pcre2_code_copy(const pcre2_code *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||||
|
*pcre2_code_copy_with_tables(const pcre2_code *);
|
||||||
|
|
||||||
|
|
||||||
/* Functions that give information about a compiled pattern. */
|
/* Functions that give information about a compiled pattern. */
|
||||||
|
@ -629,6 +631,7 @@ pcre2_compile are called by application code. */
|
||||||
|
|
||||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||||
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||||
|
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
|
||||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||||
|
|
|
@ -116,9 +116,11 @@ them will be able to (i.e. assume a 64-bit world). */
|
||||||
|
|
||||||
/* Function definitions to allow mutual recursion */
|
/* Function definitions to allow mutual recursion */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
static unsigned int
|
static unsigned int
|
||||||
add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t, compile_block *,
|
add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t,
|
||||||
const uint32_t *, unsigned int);
|
compile_block *, const uint32_t *, unsigned int);
|
||||||
|
#endif
|
||||||
|
|
||||||
static int
|
static int
|
||||||
compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t,
|
compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t,
|
||||||
|
@ -4381,16 +4383,14 @@ return n8; /* Number of 8-bit characters */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Add a list of characters to a class (internal) *
|
* Add a list of characters to a class (internal) *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function is used for adding a list of case-equivalent characters to a
|
/* This function is used for adding a list of case-equivalent characters to a
|
||||||
class, and also for adding a list of horizontal or vertical whitespace. If the
|
class when in UTF mode. This function is called only from within
|
||||||
list is in order (which it should be), ranges of characters are detected and
|
add_to_class_internal(), with which it is mutually recursive.
|
||||||
handled appropriately. This function is called (sometimes recursively) only
|
|
||||||
from within the "add to class" set of functions. The external entry point is
|
|
||||||
add_list_to_class().
|
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
classbits the bit map for characters < 256
|
classbits the bit map for characters < 256
|
||||||
|
@ -4423,6 +4423,7 @@ while (p[0] < NOTACHAR)
|
||||||
}
|
}
|
||||||
return n8;
|
return n8;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -4459,8 +4460,11 @@ return add_to_class_internal(classbits, uchardptr, options, cb, start, end);
|
||||||
* External entry point for add list to class *
|
* External entry point for add list to class *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function sets the overall range so that the internal functions can try
|
/* This function is used for adding a list of horizontal or vertical whitespace
|
||||||
to avoid duplication when handling case-independence.
|
characters to a class. The list must be in order so that ranges of characters
|
||||||
|
can be detected and handled appropriately. This function sets the overall range
|
||||||
|
so that the internal functions can try to avoid duplication when handling
|
||||||
|
case-independence.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
classbits the bit map for characters < 256
|
classbits the bit map for characters < 256
|
||||||
|
|
|
@ -1937,6 +1937,8 @@ static int
|
||||||
fill_buffer(void *handle, int frtype, char *buffer, int length,
|
fill_buffer(void *handle, int frtype, char *buffer, int length,
|
||||||
BOOL input_line_buffered)
|
BOOL input_line_buffered)
|
||||||
{
|
{
|
||||||
|
(void)frtype; /* Avoid warning when not used */
|
||||||
|
|
||||||
#ifdef SUPPORT_LIBZ
|
#ifdef SUPPORT_LIBZ
|
||||||
if (frtype == FR_LIBZ)
|
if (frtype == FR_LIBZ)
|
||||||
return gzread((gzFile)handle, buffer, length);
|
return gzread((gzFile)handle, buffer, length);
|
||||||
|
|
Loading…
Reference in New Issue