File tidies for 10.23-RC1
This commit is contained in:
parent
31c62034fe
commit
ca1ecd41a1
6
AUTHORS
6
AUTHORS
|
@ -8,7 +8,7 @@ Email domain: cam.ac.uk
|
|||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2016 University of Cambridge
|
||||
Copyright (c) 1997-2017 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
|
@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2016 Zoltan Herczeg
|
||||
Copyright(c) 2010-2017 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2016 Zoltan Herczeg
|
||||
Copyright(c) 2009-2017 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
####
|
||||
|
|
|
@ -2,8 +2,8 @@ Change Log for PCRE2
|
|||
--------------------
|
||||
|
||||
|
||||
Version 10.23 xx-xxxxxx-2016
|
||||
----------------------------
|
||||
Version 10.23 16-January-2017
|
||||
-----------------------------
|
||||
|
||||
1. Extended pcre2test with the utf8_input modifier so that it is able to
|
||||
generate all possible 16-bit and 32-bit code unit values in non-UTF modes.
|
||||
|
|
6
LICENCE
6
LICENCE
|
@ -25,7 +25,7 @@ Email domain: cam.ac.uk
|
|||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2016 University of Cambridge
|
||||
Copyright (c) 1997-2017 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -36,7 +36,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2016 Zoltan Herczeg
|
||||
Copyright(c) 2010-2017 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -47,7 +47,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2016 Zoltan Herczeg
|
||||
Copyright(c) 2009-2017 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
|
|
@ -3,6 +3,10 @@
|
|||
AUTOMAKE_OPTIONS = subdir-objects
|
||||
ACLOCAL_AMFLAGS = -I m4
|
||||
|
||||
## This seems to have become necessary for building in non-source directory.
|
||||
|
||||
AM_CPPFLAGS="-I$(srcdir)/src"
|
||||
|
||||
## Specify the documentation files that are distributed.
|
||||
|
||||
dist_doc_DATA = \
|
||||
|
|
36
NEWS
36
NEWS
|
@ -1,6 +1,42 @@
|
|||
News about PCRE2 releases
|
||||
-------------------------
|
||||
|
||||
Version 10.23 16-January-2017
|
||||
-----------------------------
|
||||
|
||||
1. ChangeLog has the details of a lot of bug fixes and tidies.
|
||||
|
||||
2. There has been a major re-factoring of the pcre2_compile.c file. Most syntax
|
||||
checking is now done in the pre-pass that identifies capturing groups. This has
|
||||
reduced the amount of duplication and made the code tidier. While doing this,
|
||||
some minor bugs and Perl incompatibilities were fixed (see ChangeLog for
|
||||
details.)
|
||||
|
||||
3. Back references are now permitted in lookbehind assertions when there are
|
||||
no duplicated group numbers (that is, (?| has not been used), and, if the
|
||||
reference is by name, there is only one group of that name. The referenced
|
||||
group must, of course be of fixed length.
|
||||
|
||||
4. \g{+<number>} (e.g. \g{+2} ) is now supported. It is a "forward back
|
||||
reference" and can be useful in repetitions (compare \g{-<number>} ). Perl does
|
||||
not recognize this syntax.
|
||||
|
||||
5. pcre2grep now automatically expands its buffer up to a maximum set by
|
||||
--max-buffer-size.
|
||||
|
||||
6. The -t option (grand total) has been added to pcre2grep.
|
||||
|
||||
7. A new function called pcre2_code_copy_with_tables() exists to copy a
|
||||
compiled pattern along with a private copy of the character tables that is
|
||||
uses.
|
||||
|
||||
8. A user supplied a number of patches to upgrade pcre2grep under Windows and
|
||||
tidy the code.
|
||||
|
||||
9. Several updates have been made to pcre2test and test scripts (see
|
||||
ChangeLog).
|
||||
|
||||
|
||||
Version 10.22 29-July-2016
|
||||
--------------------------
|
||||
|
||||
|
|
2
RunTest
2
RunTest
|
@ -520,7 +520,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
echo $title2 "(excluding UTF-$bits) (64M stack)"
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q -S 64 $bmode $opt $testdata/testinput2 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189 >>testtry
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189,190,191 >>testtry
|
||||
checkresult $? 2 "$opt"
|
||||
else
|
||||
echo " "
|
||||
|
|
10
configure.ac
10
configure.ac
|
@ -11,16 +11,16 @@ dnl be defined as -RC2, for example. For real releases, it should be empty.
|
|||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [23])
|
||||
m4_define(pcre2_prerelease, [-RC1])
|
||||
m4_define(pcre2_date, [2016-08-01])
|
||||
m4_define(pcre2_date, [2017-01-16])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre2_8_version, [4:0:4])
|
||||
m4_define(libpcre2_16_version, [4:0:4])
|
||||
m4_define(libpcre2_32_version, [4:0:4])
|
||||
m4_define(libpcre2_posix_version, [1:0:0])
|
||||
m4_define(libpcre2_8_version, [5:0:5])
|
||||
m4_define(libpcre2_16_version, [5:0:5])
|
||||
m4_define(libpcre2_32_version, [5:0:5])
|
||||
m4_define(libpcre2_posix_version, [1:1:0])
|
||||
|
||||
AC_PREREQ(2.57)
|
||||
AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2)
|
||||
|
|
|
@ -45,8 +45,8 @@ is <b>pcre2_match()</b>.) The arguments for this function are:
|
|||
<i>wscount</i> Number of elements in the vector
|
||||
</pre>
|
||||
For <b>pcre2_dfa_match()</b>, a match context is needed only if you want to set
|
||||
up a callout function. The <i>length</i> and <i>startoffset</i> values are code
|
||||
units, not characters. The options are:
|
||||
up a callout function or specify the recursion limit. The <i>length</i> and
|
||||
<i>startoffset</i> values are code units, not characters. The options are:
|
||||
<pre>
|
||||
PCRE2_ANCHORED Match only at the first position
|
||||
PCRE2_NOTBOL Subject is not the beginning of a line
|
||||
|
|
|
@ -879,21 +879,23 @@ This limit is of use only if it is set smaller than <i>match_limit</i>.
|
|||
Limiting the recursion depth limits the amount of system stack that can be
|
||||
used, or, when PCRE2 has been compiled to use memory on the heap instead of the
|
||||
stack, the amount of heap memory that can be used. This limit is not relevant,
|
||||
and is ignored, when matching is done using JIT compiled code or by the
|
||||
<b>pcre2_dfa_match()</b> function.
|
||||
and is ignored, when matching is done using JIT compiled code. However, it is
|
||||
supported by <b>pcre2_dfa_match()</b>, which uses recursive function calls less
|
||||
frequently than <b>pcre2_match()</b>, but which can be caused to use a lot of
|
||||
stack by a recursive pattern such as /(.)(?1)/ matched to a very long string.
|
||||
</P>
|
||||
<P>
|
||||
The default value for <i>recursion_limit</i> can be set when PCRE2 is built; the
|
||||
default default is the same value as the default for <i>match_limit</i>. If the
|
||||
limit is exceeded, <b>pcre2_match()</b> returns PCRE2_ERROR_RECURSIONLIMIT. A
|
||||
value for the recursion limit may also be supplied by an item at the start of a
|
||||
pattern of the form
|
||||
limit is exceeded, <b>pcre2_match()</b> and <b>pcre2_dfa_match()</b> return
|
||||
PCRE2_ERROR_RECURSIONLIMIT. A value for the recursion limit may also be
|
||||
supplied by an item at the start of a pattern of the form
|
||||
<pre>
|
||||
(*LIMIT_RECURSION=ddd)
|
||||
</pre>
|
||||
where ddd is a decimal number. However, such a setting is ignored unless ddd is
|
||||
less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
|
||||
limit is set, less than the default.
|
||||
less than the limit set by the caller of <b>pcre2_match()</b> or
|
||||
<b>pcre2_dfa_match()</b> or, if no such limit is set, less than the default.
|
||||
<b>int pcre2_set_recursion_memory_management(</b>
|
||||
<b> pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
|
||||
|
@ -1976,7 +1978,7 @@ Information about a successful or unsuccessful match is placed in a match
|
|||
data block, which is an opaque structure that is accessed by function calls. In
|
||||
particular, the match data block contains a vector of offsets into the subject
|
||||
string that define the matched part of the subject and any substrings that were
|
||||
captured. This is know as the <i>ovector</i>.
|
||||
captured. This is known as the <i>ovector</i>.
|
||||
</P>
|
||||
<P>
|
||||
Before calling <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b>, or
|
||||
|
@ -3263,7 +3265,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC41" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 22 November 2016
|
||||
Last updated: 23 December 2016
|
||||
<br>
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -245,13 +245,23 @@ because <b>pcre2grep</b> has to search for all possible matches in a line, not
|
|||
just one, in order to colour them all.
|
||||
<br>
|
||||
<br>
|
||||
The colour that is used can be specified by setting the environment variable
|
||||
PCRE2GREP_COLOUR or PCRE2GREP_COLOR. If neither of these are set,
|
||||
<b>pcre2grep</b> looks for GREP_COLOUR or GREP_COLOR. The value of the variable
|
||||
should be a string of two numbers, separated by a semicolon. They are copied
|
||||
directly into the control string for setting colour on a terminal, so it is
|
||||
your responsibility to ensure that they make sense. If neither of the
|
||||
environment variables is set, the default is "1;31", which gives red.
|
||||
The colour that is used can be specified by setting one of the environment
|
||||
variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, PCREGREP_COLOUR, or
|
||||
PCREGREP_COLOR, which are checked in that order. If none of these are set,
|
||||
<b>pcre2grep</b> looks for GREP_COLORS or GREP_COLOR (in that order). The value
|
||||
of the variable should be a string of two numbers, separated by a semicolon,
|
||||
except in the case of GREP_COLORS, which must start with "ms=" or "mt="
|
||||
followed by two semicolon-separated colours, terminated by the end of the
|
||||
string or by a colon. If GREP_COLORS does not start with "ms=" or "mt=" it is
|
||||
ignored, and GREP_COLOR is checked.
|
||||
<br>
|
||||
<br>
|
||||
If the string obtained from one of the above variables contains any characters
|
||||
other than semicolon or digits, the setting is ignored and the default colour
|
||||
is used. The string is copied directly into the control string for setting
|
||||
colour on a terminal, so it is your responsibility to ensure that the values
|
||||
make sense. If no relevant environment variable is set, the default is "1;31",
|
||||
which gives red.
|
||||
</P>
|
||||
<P>
|
||||
<b>-D</b> <i>action</i>, <b>--devices=</b><i>action</i>
|
||||
|
@ -773,12 +783,12 @@ character. Otherwise <b>pcre2grep</b> will assume that it has no data.
|
|||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">CALLING EXTERNAL SCRIPTS</a><br>
|
||||
<P>
|
||||
On non-Windows systems, <b>pcre2grep</b> has, by default, support for calling
|
||||
external programs or scripts during matching by making use of PCRE2's callout
|
||||
facility. However, this support can be disabled when <b>pcre2grep</b> is built.
|
||||
You can find out whether your binary has support for callouts by running it
|
||||
with the <b>--help</b> option. If the support is not enabled, all callouts in
|
||||
patterns are ignored by <b>pcre2grep</b>.
|
||||
<b>pcre2grep</b> has, by default, support for calling external programs or
|
||||
scripts during matching by making use of PCRE2's callout facility. However,
|
||||
this support can be disabled when <b>pcre2grep</b> is built. You can find out
|
||||
whether your binary has support for callouts by running it with the <b>--help</b>
|
||||
option. If the support is not enabled, all callouts in patterns are ignored by
|
||||
<b>pcre2grep</b>.
|
||||
</P>
|
||||
<P>
|
||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
|
||||
|
@ -860,7 +870,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 31 October 2016
|
||||
Last updated: 31 December 2016
|
||||
<br>
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -190,6 +190,12 @@ be less than the value set (or defaulted) by the caller of <b>pcre2_match()</b>
|
|||
for it to have any effect. In other words, the pattern writer can lower the
|
||||
limits set by the programmer, but not raise them. If there is more than one
|
||||
setting of one of these limits, the lower value is used.
|
||||
</P>
|
||||
<P>
|
||||
The match limit is used (but in a different way) when JIT is being used, but it
|
||||
is not relevant, and is ignored, when matching with <b>pcre2_dfa_match()</b>.
|
||||
However, the recursion limit is relevant for DFA matching, which does use some
|
||||
function recursion, in particular, for recursions within the pattern.
|
||||
<a name="newlines"></a></P>
|
||||
<br><b>
|
||||
Newline conventions
|
||||
|
@ -1347,10 +1353,10 @@ or immediately after a range. For example, [b-d-z] matches letters in the range
|
|||
b to d, a hyphen character, or z.
|
||||
</P>
|
||||
<P>
|
||||
Perl treats a hyphen as a literal if it appears before a POSIX class (see
|
||||
below) or a character type escape such as as \d, but gives a warning in its
|
||||
warning mode, as this is most likely a user error. As PCRE2 has no facility for
|
||||
warning, an error is given in these cases.
|
||||
Perl treats a hyphen as a literal if it appears before or after a POSIX class
|
||||
(see below) or a character type escape such as as \d, but gives a warning in
|
||||
its warning mode, as this is most likely a user error. As PCRE2 has no facility
|
||||
for warning, an error is given in these cases.
|
||||
</P>
|
||||
<P>
|
||||
It is not possible to have the literal character "]" as the end character of a
|
||||
|
@ -3446,7 +3452,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 23 October 2016
|
||||
Last updated: 27 December 2016
|
||||
<br>
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -57,12 +57,13 @@ assertion and "once-only" subpatterns, which are handled like subroutine calls.
|
|||
Normally, these are never very deep, and the limit on the complexity of
|
||||
<b>pcre2_dfa_match()</b> is controlled by the amount of workspace it is given.
|
||||
However, it is possible to write patterns with runaway infinite recursions;
|
||||
such patterns will cause <b>pcre2_dfa_match()</b> to run out of stack. At
|
||||
present, there is no protection against this.
|
||||
such patterns will cause <b>pcre2_dfa_match()</b> to run out of stack unless a
|
||||
limit is applied (see below).
|
||||
</P>
|
||||
<P>
|
||||
The comments that follow do NOT apply to <b>pcre2_dfa_match()</b>; they are
|
||||
relevant only for <b>pcre2_match()</b> without the JIT optimization.
|
||||
The comments in the next three sections do not apply to
|
||||
<b>pcre2_dfa_match()</b>; they are relevant only for <b>pcre2_match()</b> without
|
||||
the JIT optimization.
|
||||
</P>
|
||||
<br><b>
|
||||
Reducing <b>pcre2_match()</b>'s stack usage
|
||||
|
@ -115,7 +116,7 @@ entitled
|
|||
in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
documentation. Since the block sizes are always the same, it may be possible to
|
||||
implement customized a memory handler that is more efficient than the standard
|
||||
implement a customized memory handler that is more efficient than the standard
|
||||
function. The memory blocks obtained for this purpose are retained and re-used
|
||||
if possible while <b>pcre2_match()</b> is running. They are all freed just
|
||||
before it exits.
|
||||
|
@ -151,6 +152,15 @@ pattern to match. This is done by calling <b>pcre2_match()</b> repeatedly with
|
|||
different limits.
|
||||
</P>
|
||||
<br><b>
|
||||
Limiting <b>pcre2_dfa_match()</b>'s stack usage
|
||||
</b><br>
|
||||
<P>
|
||||
The recursion limit, as described above for <b>pcre2_match()</b>, also applies
|
||||
to <b>pcre2_dfa_match()</b>, whose use of recursive function calls for
|
||||
recursions in the pattern can lead to runaway stack usage. The non-recursive
|
||||
match limit is not relevant for DFA matching, and is ignored.
|
||||
</P>
|
||||
<br><b>
|
||||
Changing stack size in Unix-like systems
|
||||
</b><br>
|
||||
<P>
|
||||
|
@ -198,9 +208,9 @@ Cambridge, England.
|
|||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 21 November 2014
|
||||
Last updated: 23 December 2016
|
||||
<br>
|
||||
Copyright © 1997-2014 University of Cambridge.
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
|
|
|
@ -451,9 +451,10 @@ appear.
|
|||
(*UCP) set PCRE2_UCP (use Unicode properties for \d etc)
|
||||
</pre>
|
||||
Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of the
|
||||
limits set by the caller of pcre2_match(), not increase them. The application
|
||||
can lock out the use of (*UTF) and (*UCP) by setting the PCRE2_NEVER_UTF or
|
||||
PCRE2_NEVER_UCP options, respectively, at compile time.
|
||||
limits set by the caller of <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>, not
|
||||
increase them. The application can lock out the use of (*UTF) and (*UCP) by
|
||||
setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at
|
||||
compile time.
|
||||
</P>
|
||||
<br><a name="SEC17" href="#TOC1">NEWLINE CONVENTION</a><br>
|
||||
<P>
|
||||
|
@ -595,7 +596,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 28 September 2016
|
||||
Last updated: 23 December 2016
|
||||
<br>
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -153,8 +153,13 @@ the 32-bit library has been built, this is the default. If the 32-bit library
|
|||
has not been built, this option causes an error.
|
||||
</P>
|
||||
<P>
|
||||
<b>-ac</b>
|
||||
Behave as if each pattern has the <b>auto_callout</b> modifier, that is, insert
|
||||
automatic callouts into every pattern that is compiled.
|
||||
</P>
|
||||
<P>
|
||||
<b>-b</b>
|
||||
Behave as if each pattern has the <b>/fullbincode</b> modifier; the full
|
||||
Behave as if each pattern has the <b>fullbincode</b> modifier; the full
|
||||
internal binary form of the pattern is output after compilation.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -220,7 +225,7 @@ Output a brief summary these options and then exit.
|
|||
</P>
|
||||
<P>
|
||||
<b>-i</b>
|
||||
Behave as if each pattern has the <b>/info</b> modifier; information about the
|
||||
Behave as if each pattern has the <b>info</b> modifier; information about the
|
||||
compiled pattern is given after compilation.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -806,7 +811,7 @@ modes are to be compiled:
|
|||
2 compile JIT code for soft partial matching
|
||||
4 compile JIT code for hard partial matching
|
||||
</pre>
|
||||
The possible values for the <b>/jit</b> modifier are therefore:
|
||||
The possible values for the <b>jit</b> modifier are therefore:
|
||||
<pre>
|
||||
0 disable JIT
|
||||
1 normal matching only
|
||||
|
@ -852,14 +857,14 @@ code was actually used in the match.
|
|||
Setting a locale
|
||||
</b><br>
|
||||
<P>
|
||||
The <b>/locale</b> modifier must specify the name of a locale, for example:
|
||||
The <b>locale</b> modifier must specify the name of a locale, for example:
|
||||
<pre>
|
||||
/pattern/locale=fr_FR
|
||||
</pre>
|
||||
The given locale is set, <b>pcre2_maketables()</b> is called to build a set of
|
||||
character tables for the locale, and this is then passed to
|
||||
<b>pcre2_compile()</b> when compiling the regular expression. The same tables
|
||||
are used when matching the following subject lines. The <b>/locale</b> modifier
|
||||
are used when matching the following subject lines. The <b>locale</b> modifier
|
||||
applies only to the pattern on which it appears, but can be given in a
|
||||
<b>#pattern</b> command if a default is needed. Setting a locale and alternate
|
||||
character tables are mutually exclusive.
|
||||
|
@ -868,7 +873,7 @@ character tables are mutually exclusive.
|
|||
Showing pattern memory
|
||||
</b><br>
|
||||
<P>
|
||||
The <b>/memory</b> modifier causes the size in bytes of the memory used to hold
|
||||
The <b>memory</b> modifier causes the size in bytes of the memory used to hold
|
||||
the compiled pattern to be output. This does not include the size of the
|
||||
<b>pcre2_code</b> block; it is just the actual compiled data. If the pattern is
|
||||
subsequently passed to the JIT compiler, the size of the JIT compiled code is
|
||||
|
@ -937,7 +942,7 @@ an error.
|
|||
Testing the stack guard feature
|
||||
</b><br>
|
||||
<P>
|
||||
The <b>/stackguard</b> modifier is used to test the use of
|
||||
The <b>stackguard</b> modifier is used to test the use of
|
||||
<b>pcre2_set_compile_recursion_guard()</b>, a function that is provided to
|
||||
enable stack availability to be checked during compilation (see the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
|
@ -952,7 +957,7 @@ be aborted.
|
|||
Using alternative character tables
|
||||
</b><br>
|
||||
<P>
|
||||
The value specified for the <b>/tables</b> modifier must be one of the digits 0,
|
||||
The value specified for the <b>tables</b> modifier must be one of the digits 0,
|
||||
1, or 2. It causes a specific set of built-in character tables to be passed to
|
||||
<b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour with
|
||||
different character tables. The digit specifies the tables as follows:
|
||||
|
@ -1042,7 +1047,7 @@ The partial matching modifiers are provided with abbreviations because they
|
|||
appear frequently in tests.
|
||||
</P>
|
||||
<P>
|
||||
If the <b>/posix</b> modifier was present on the pattern, causing the POSIX
|
||||
If the <b>posix</b> modifier was present on the pattern, causing the POSIX
|
||||
wrapper API to be used, the only option-setting modifiers that have any effect
|
||||
are <b>notbol</b>, <b>notempty</b>, and <b>noteol</b>, causing REG_NOTBOL,
|
||||
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to <b>regexec()</b>.
|
||||
|
@ -1064,6 +1069,7 @@ pattern.
|
|||
altglobal alternative global matching
|
||||
callout_capture show captures at callout time
|
||||
callout_data=<n> set a value to pass via callouts
|
||||
callout_error=<n>[:<m>] control callout error
|
||||
callout_fail=<n>[:<m>] control callout failure
|
||||
callout_none do not supply a callout function
|
||||
copy=<number or name> copy captured substring
|
||||
|
@ -1159,15 +1165,22 @@ Testing callouts
|
|||
<P>
|
||||
A callout function is supplied when <b>pcre2test</b> calls the library matching
|
||||
functions, unless <b>callout_none</b> is specified. If <b>callout_capture</b> is
|
||||
set, the current captured groups are output when a callout occurs.
|
||||
set, the current captured groups are output when a callout occurs. The default
|
||||
return from the callout function is zero, which allows matching to continue.
|
||||
</P>
|
||||
<P>
|
||||
The <b>callout_fail</b> modifier can be given one or two numbers. If there is
|
||||
only one number, 1 is returned instead of 0 when a callout of that number is
|
||||
reached. If two numbers are given, 1 is returned when callout <n> is reached
|
||||
for the <m>th time. Note that callouts with string arguments are always given
|
||||
the number zero. See "Callouts" below for a description of the output when a
|
||||
callout it taken.
|
||||
only one number, 1 is returned instead of 0 (causing matching to backtrack)
|
||||
when a callout of that number is reached. If two numbers (<n>:<m>) are given, 1
|
||||
is returned when callout <n> is reached and there have been at least <m>
|
||||
callouts. The <b>callout_error</b> modifier is similar, except that
|
||||
PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be
|
||||
aborted. If both these modifiers are set for the same callout number,
|
||||
<b>callout_error</b> takes precedence.
|
||||
</P>
|
||||
<P>
|
||||
Note that callouts with string arguments are always given the number zero. See
|
||||
"Callouts" below for a description of the output when a callout it taken.
|
||||
</P>
|
||||
<P>
|
||||
The <b>callout_data</b> modifier can be given an unsigned or a negative number.
|
||||
|
@ -1180,7 +1193,7 @@ Finding all matches in a string
|
|||
</b><br>
|
||||
<P>
|
||||
Searching for all possible matches within a subject can be requested by the
|
||||
<b>global</b> or <b>/altglobal</b> modifier. After finding a match, the matching
|
||||
<b>global</b> or <b>altglobal</b> modifier. After finding a match, the matching
|
||||
function is called again to search the remainder of the subject. The difference
|
||||
between <b>global</b> and <b>altglobal</b> is that the former uses the
|
||||
<i>start_offset</i> argument to <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>
|
||||
|
@ -1480,7 +1493,7 @@ unset substring is shown as "<unset>", as for the second data line.
|
|||
If the strings contain any non-printing characters, they are output as \xhh
|
||||
escapes if the value is less than 256 and UTF mode is not set. Otherwise they
|
||||
are output as \x{hh...} escapes. See below for the definition of non-printing
|
||||
characters. If the <b>/aftertext</b> modifier is set, the output for substring
|
||||
characters. If the <b>aftertext</b> modifier is set, the output for substring
|
||||
0 is followed by the the rest of the subject string, identified by "0+" like
|
||||
this:
|
||||
<pre>
|
||||
|
@ -1673,7 +1686,7 @@ therefore shown as hex escapes.
|
|||
<P>
|
||||
When <b>pcre2test</b> is outputting text that is a matched part of a subject
|
||||
string, it behaves in the same way, unless a different locale has been set for
|
||||
the pattern (using the <b>/locale</b> modifier). In this case, the
|
||||
the pattern (using the <b>locale</b> modifier). In this case, the
|
||||
<b>isprint()</b> function is used to distinguish printing and non-printing
|
||||
characters.
|
||||
<a name="saverestore"></a></P>
|
||||
|
@ -1766,7 +1779,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 04 November 2016
|
||||
Last updated: 28 December 2016
|
||||
<br>
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -922,19 +922,22 @@ PCRE2 CONTEXTS
|
|||
be used, or, when PCRE2 has been compiled to use memory on the heap
|
||||
instead of the stack, the amount of heap memory that can be used. This
|
||||
limit is not relevant, and is ignored, when matching is done using JIT
|
||||
compiled code or by the pcre2_dfa_match() function.
|
||||
compiled code. However, it is supported by pcre2_dfa_match(), which
|
||||
uses recursive function calls less frequently than pcre2_match(), but
|
||||
which can be caused to use a lot of stack by a recursive pattern such
|
||||
as /(.)(?1)/ matched to a very long string.
|
||||
|
||||
The default value for recursion_limit can be set when PCRE2 is built;
|
||||
the default default is the same value as the default for match_limit.
|
||||
If the limit is exceeded, pcre2_match() returns PCRE2_ERROR_RECURSION-
|
||||
LIMIT. A value for the recursion limit may also be supplied by an item
|
||||
at the start of a pattern of the form
|
||||
If the limit is exceeded, pcre2_match() and pcre2_dfa_match() return
|
||||
PCRE2_ERROR_RECURSIONLIMIT. A value for the recursion limit may also be
|
||||
supplied by an item at the start of a pattern of the form
|
||||
|
||||
(*LIMIT_RECURSION=ddd)
|
||||
|
||||
where ddd is a decimal number. However, such a setting is ignored
|
||||
unless ddd is less than the limit set by the caller of pcre2_match()
|
||||
or, if no such limit is set, less than the default.
|
||||
unless ddd is less than the limit set by the caller of pcre2_match() or
|
||||
pcre2_dfa_match() or, if no such limit is set, less than the default.
|
||||
|
||||
int pcre2_set_recursion_memory_management(
|
||||
pcre2_match_context *mcontext,
|
||||
|
@ -1986,7 +1989,7 @@ THE MATCH DATA BLOCK
|
|||
match data block, which is an opaque structure that is accessed by
|
||||
function calls. In particular, the match data block contains a vector
|
||||
of offsets into the subject string that define the matched part of the
|
||||
subject and any substrings that were captured. This is know as the
|
||||
subject and any substrings that were captured. This is known as the
|
||||
ovector.
|
||||
|
||||
Before calling pcre2_match(), pcre2_dfa_match(), or pcre2_jit_match()
|
||||
|
@ -3175,7 +3178,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 22 November 2016
|
||||
Last updated: 23 December 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -5537,6 +5540,12 @@ SPECIAL START-OF-PATTERN ITEMS
|
|||
If there is more than one setting of one of these limits, the lower
|
||||
value is used.
|
||||
|
||||
The match limit is used (but in a different way) when JIT is being
|
||||
used, but it is not relevant, and is ignored, when matching with
|
||||
pcre2_dfa_match(). However, the recursion limit is relevant for DFA
|
||||
matching, which does use some function recursion, in particular, for
|
||||
recursions within the pattern.
|
||||
|
||||
Newline conventions
|
||||
|
||||
PCRE2 supports five different conventions for indicating line breaks in
|
||||
|
@ -6497,10 +6506,10 @@ SQUARE BRACKETS AND CHARACTER CLASSES
|
|||
example, [b-d-z] matches letters in the range b to d, a hyphen charac-
|
||||
ter, or z.
|
||||
|
||||
Perl treats a hyphen as a literal if it appears before a POSIX class
|
||||
(see below) or a character type escape such as as \d, but gives a warn-
|
||||
ing in its warning mode, as this is most likely a user error. As PCRE2
|
||||
has no facility for warning, an error is given in these cases.
|
||||
Perl treats a hyphen as a literal if it appears before or after a POSIX
|
||||
class (see below) or a character type escape such as as \d, but gives a
|
||||
warning in its warning mode, as this is most likely a user error. As
|
||||
PCRE2 has no facility for warning, an error is given in these cases.
|
||||
|
||||
It is not possible to have the literal character "]" as the end charac-
|
||||
ter of a range. A pattern such as [W-]46] is interpreted as a class of
|
||||
|
@ -8487,7 +8496,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 23 October 2016
|
||||
Last updated: 27 December 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -9255,11 +9264,12 @@ PCRE2 DISCUSSION OF STACK USAGE
|
|||
limit on the complexity of pcre2_dfa_match() is controlled by the
|
||||
amount of workspace it is given. However, it is possible to write pat-
|
||||
terns with runaway infinite recursions; such patterns will cause
|
||||
pcre2_dfa_match() to run out of stack. At present, there is no protec-
|
||||
tion against this.
|
||||
pcre2_dfa_match() to run out of stack unless a limit is applied (see
|
||||
below).
|
||||
|
||||
The comments that follow do NOT apply to pcre2_dfa_match(); they are
|
||||
relevant only for pcre2_match() without the JIT optimization.
|
||||
The comments in the next three sections do not apply to
|
||||
pcre2_dfa_match(); they are relevant only for pcre2_match() without the
|
||||
JIT optimization.
|
||||
|
||||
Reducing pcre2_match()'s stack usage
|
||||
|
||||
|
@ -9303,7 +9313,7 @@ PCRE2 DISCUSSION OF STACK USAGE
|
|||
arrange to supply your own memory management function. For details, see
|
||||
the section entitled "The match context" in the pcre2api documentation.
|
||||
Since the block sizes are always the same, it may be possible to imple-
|
||||
ment customized a memory handler that is more efficient than the stan-
|
||||
ment a customized memory handler that is more efficient than the stan-
|
||||
dard function. The memory blocks obtained for this purpose are retained
|
||||
and re-used if possible while pcre2_match() is running. They are all
|
||||
freed just before it exits.
|
||||
|
@ -9329,6 +9339,13 @@ PCRE2 DISCUSSION OF STACK USAGE
|
|||
that allow a a pattern to match. This is done by calling pcre2_match()
|
||||
repeatedly with different limits.
|
||||
|
||||
Limiting pcre2_dfa_match()'s stack usage
|
||||
|
||||
The recursion limit, as described above for pcre2_match(), also applies
|
||||
to pcre2_dfa_match(), whose use of recursive function calls for recur-
|
||||
sions in the pattern can lead to runaway stack usage. The non-recursive
|
||||
match limit is not relevant for DFA matching, and is ignored.
|
||||
|
||||
Changing stack size in Unix-like systems
|
||||
|
||||
In Unix-like environments, there is not often a problem with the stack
|
||||
|
@ -9368,8 +9385,8 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 21 November 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
Last updated: 23 December 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
@ -9667,10 +9684,10 @@ OPTION SETTING
|
|||
(*UCP) set PCRE2_UCP (use Unicode properties for \d etc)
|
||||
|
||||
Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of
|
||||
the limits set by the caller of pcre2_match(), not increase them. The
|
||||
application can lock out the use of (*UTF) and (*UCP) by setting the
|
||||
PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at compile
|
||||
time.
|
||||
the limits set by the caller of pcre2_match() or pcre2_dfa_match(), not
|
||||
increase them. The application can lock out the use of (*UTF) and
|
||||
(*UCP) by setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options,
|
||||
respectively, at compile time.
|
||||
|
||||
|
||||
NEWLINE CONVENTION
|
||||
|
@ -9811,7 +9828,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 28 September 2016
|
||||
Last updated: 23 December 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -219,15 +219,25 @@ OPTIONS
|
|||
possible matches in a line, not just one, in order to colour
|
||||
them all.
|
||||
|
||||
The colour that is used can be specified by setting the envi-
|
||||
ronment variable PCRE2GREP_COLOUR or PCRE2GREP_COLOR. If nei-
|
||||
ther of these are set, pcre2grep looks for GREP_COLOUR or
|
||||
GREP_COLOR. The value of the variable should be a string of
|
||||
two numbers, separated by a semicolon. They are copied
|
||||
directly into the control string for setting colour on a ter-
|
||||
minal, so it is your responsibility to ensure that they make
|
||||
sense. If neither of the environment variables is set, the
|
||||
default is "1;31", which gives red.
|
||||
The colour that is used can be specified by setting one of
|
||||
the environment variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR,
|
||||
PCREGREP_COLOUR, or PCREGREP_COLOR, which are checked in that
|
||||
order. If none of these are set, pcre2grep looks for
|
||||
GREP_COLORS or GREP_COLOR (in that order). The value of the
|
||||
variable should be a string of two numbers, separated by a
|
||||
semicolon, except in the case of GREP_COLORS, which must
|
||||
start with "ms=" or "mt=" followed by two semicolon-separated
|
||||
colours, terminated by the end of the string or by a colon.
|
||||
If GREP_COLORS does not start with "ms=" or "mt=" it is
|
||||
ignored, and GREP_COLOR is checked.
|
||||
|
||||
If the string obtained from one of the above variables con-
|
||||
tains any characters other than semicolon or digits, the set-
|
||||
ting is ignored and the default colour is used. The string is
|
||||
copied directly into the control string for setting colour on
|
||||
a terminal, so it is your responsibility to ensure that the
|
||||
values make sense. If no relevant environment variable is
|
||||
set, the default is "1;31", which gives red.
|
||||
|
||||
-D action, --devices=action
|
||||
If an input path is not a regular file or a directory,
|
||||
|
@ -763,12 +773,12 @@ OPTIONS WITH DATA
|
|||
|
||||
CALLING EXTERNAL SCRIPTS
|
||||
|
||||
On non-Windows systems, pcre2grep has, by default, support for calling
|
||||
external programs or scripts during matching by making use of PCRE2's
|
||||
callout facility. However, this support can be disabled when pcre2grep
|
||||
is built. You can find out whether your binary has support for call-
|
||||
outs by running it with the --help option. If the support is not
|
||||
enabled, all callouts in patterns are ignored by pcre2grep.
|
||||
pcre2grep has, by default, support for calling external programs or
|
||||
scripts during matching by making use of PCRE2's callout facility. How-
|
||||
ever, this support can be disabled when pcre2grep is built. You can
|
||||
find out whether your binary has support for callouts by running it
|
||||
with the --help option. If the support is not enabled, all callouts in
|
||||
patterns are ignored by pcre2grep.
|
||||
|
||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
|
||||
ment is either a number or a quoted string (see the pcre2callout docu-
|
||||
|
@ -852,5 +862,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 31 October 2016
|
||||
Last updated: 31 December 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
|
|
|
@ -116,7 +116,11 @@ COMMAND LINE OPTIONS
|
|||
is the default. If the 32-bit library has not been built,
|
||||
this option causes an error.
|
||||
|
||||
-b Behave as if each pattern has the /fullbincode modifier; the
|
||||
-ac Behave as if each pattern has the auto_callout modifier, that
|
||||
is, insert automatic callouts into every pattern that is com-
|
||||
piled.
|
||||
|
||||
-b Behave as if each pattern has the fullbincode modifier; the
|
||||
full internal binary form of the pattern is output after com-
|
||||
pilation.
|
||||
|
||||
|
@ -174,7 +178,7 @@ COMMAND LINE OPTIONS
|
|||
|
||||
-help Output a brief summary these options and then exit.
|
||||
|
||||
-i Behave as if each pattern has the /info modifier; information
|
||||
-i Behave as if each pattern has the info modifier; information
|
||||
about the compiled pattern is given after compilation.
|
||||
|
||||
-jit Behave as if each pattern line has the jit modifier; after
|
||||
|
@ -724,7 +728,7 @@ PATTERN MODIFIERS
|
|||
2 compile JIT code for soft partial matching
|
||||
4 compile JIT code for hard partial matching
|
||||
|
||||
The possible values for the /jit modifier are therefore:
|
||||
The possible values for the jit modifier are therefore:
|
||||
|
||||
0 disable JIT
|
||||
1 normal matching only
|
||||
|
@ -764,24 +768,24 @@ PATTERN MODIFIERS
|
|||
|
||||
Setting a locale
|
||||
|
||||
The /locale modifier must specify the name of a locale, for example:
|
||||
The locale modifier must specify the name of a locale, for example:
|
||||
|
||||
/pattern/locale=fr_FR
|
||||
|
||||
The given locale is set, pcre2_maketables() is called to build a set of
|
||||
character tables for the locale, and this is then passed to pcre2_com-
|
||||
pile() when compiling the regular expression. The same tables are used
|
||||
when matching the following subject lines. The /locale modifier applies
|
||||
when matching the following subject lines. The locale modifier applies
|
||||
only to the pattern on which it appears, but can be given in a #pattern
|
||||
command if a default is needed. Setting a locale and alternate charac-
|
||||
ter tables are mutually exclusive.
|
||||
|
||||
Showing pattern memory
|
||||
|
||||
The /memory modifier causes the size in bytes of the memory used to
|
||||
hold the compiled pattern to be output. This does not include the size
|
||||
of the pcre2_code block; it is just the actual compiled data. If the
|
||||
pattern is subsequently passed to the JIT compiler, the size of the JIT
|
||||
The memory modifier causes the size in bytes of the memory used to hold
|
||||
the compiled pattern to be output. This does not include the size of
|
||||
the pcre2_code block; it is just the actual compiled data. If the pat-
|
||||
tern is subsequently passed to the JIT compiler, the size of the JIT
|
||||
compiled code is also output. Here is an example:
|
||||
|
||||
re> /a(b)c/jit,memory
|
||||
|
@ -837,7 +841,7 @@ PATTERN MODIFIERS
|
|||
|
||||
Testing the stack guard feature
|
||||
|
||||
The /stackguard modifier is used to test the use of pcre2_set_com-
|
||||
The stackguard modifier is used to test the use of pcre2_set_com-
|
||||
pile_recursion_guard(), a function that is provided to enable stack
|
||||
availability to be checked during compilation (see the pcre2api docu-
|
||||
mentation for details). If the number specified by the modifier is
|
||||
|
@ -849,7 +853,7 @@ PATTERN MODIFIERS
|
|||
|
||||
Using alternative character tables
|
||||
|
||||
The value specified for the /tables modifier must be one of the digits
|
||||
The value specified for the tables modifier must be one of the digits
|
||||
0, 1, or 2. It causes a specific set of built-in character tables to be
|
||||
passed to pcre2_compile(). This is used in the PCRE2 tests to check be-
|
||||
haviour with different character tables. The digit specifies the tables
|
||||
|
@ -931,7 +935,7 @@ SUBJECT MODIFIERS
|
|||
The partial matching modifiers are provided with abbreviations because
|
||||
they appear frequently in tests.
|
||||
|
||||
If the /posix modifier was present on the pattern, causing the POSIX
|
||||
If the posix modifier was present on the pattern, causing the POSIX
|
||||
wrapper API to be used, the only option-setting modifiers that have any
|
||||
effect are notbol, notempty, and noteol, causing REG_NOTBOL,
|
||||
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to regexec().
|
||||
|
@ -951,6 +955,7 @@ SUBJECT MODIFIERS
|
|||
altglobal alternative global matching
|
||||
callout_capture show captures at callout time
|
||||
callout_data=<n> set a value to pass via callouts
|
||||
callout_error=<n>[:<m>] control callout error
|
||||
callout_fail=<n>[:<m>] control callout failure
|
||||
callout_none do not supply a callout function
|
||||
copy=<number or name> copy captured substring
|
||||
|
@ -1042,14 +1047,22 @@ SUBJECT MODIFIERS
|
|||
|
||||
A callout function is supplied when pcre2test calls the library match-
|
||||
ing functions, unless callout_none is specified. If callout_capture is
|
||||
set, the current captured groups are output when a callout occurs.
|
||||
set, the current captured groups are output when a callout occurs. The
|
||||
default return from the callout function is zero, which allows matching
|
||||
to continue.
|
||||
|
||||
The callout_fail modifier can be given one or two numbers. If there is
|
||||
only one number, 1 is returned instead of 0 when a callout of that num-
|
||||
ber is reached. If two numbers are given, 1 is returned when callout
|
||||
<n> is reached for the <m>th time. Note that callouts with string argu-
|
||||
ments are always given the number zero. See "Callouts" below for a
|
||||
description of the output when a callout it taken.
|
||||
only one number, 1 is returned instead of 0 (causing matching to back-
|
||||
track) when a callout of that number is reached. If two numbers
|
||||
(<n>:<m>) are given, 1 is returned when callout <n> is reached and
|
||||
there have been at least <m> callouts. The callout_error modifier is
|
||||
similar, except that PCRE2_ERROR_CALLOUT is returned, causing the
|
||||
entire matching process to be aborted. If both these modifiers are set
|
||||
for the same callout number, callout_error takes precedence.
|
||||
|
||||
Note that callouts with string arguments are always given the number
|
||||
zero. See "Callouts" below for a description of the output when a call-
|
||||
out it taken.
|
||||
|
||||
The callout_data modifier can be given an unsigned or a negative num-
|
||||
ber. This is set as the "user data" that is passed to the matching
|
||||
|
@ -1060,7 +1073,7 @@ SUBJECT MODIFIERS
|
|||
Finding all matches in a string
|
||||
|
||||
Searching for all possible matches within a subject can be requested by
|
||||
the global or /altglobal modifier. After finding a match, the matching
|
||||
the global or altglobal modifier. After finding a match, the matching
|
||||
function is called again to search the remainder of the subject. The
|
||||
difference between global and altglobal is that the former uses the
|
||||
start_offset argument to pcre2_match() or pcre2_dfa_match() to start
|
||||
|
@ -1329,7 +1342,7 @@ DEFAULT OUTPUT FROM pcre2test
|
|||
If the strings contain any non-printing characters, they are output as
|
||||
\xhh escapes if the value is less than 256 and UTF mode is not set.
|
||||
Otherwise they are output as \x{hh...} escapes. See below for the defi-
|
||||
nition of non-printing characters. If the /aftertext modifier is set,
|
||||
nition of non-printing characters. If the aftertext modifier is set,
|
||||
the output for substring 0 is followed by the the rest of the subject
|
||||
string, identified by "0+" like this:
|
||||
|
||||
|
@ -1512,7 +1525,7 @@ NON-PRINTING CHARACTERS
|
|||
|
||||
When pcre2test is outputting text that is a matched part of a subject
|
||||
string, it behaves in the same way, unless a different locale has been
|
||||
set for the pattern (using the /locale modifier). In this case, the
|
||||
set for the pattern (using the locale modifier). In this case, the
|
||||
isprint() function is used to distinguish printing and non-printing
|
||||
characters.
|
||||
|
||||
|
@ -1601,5 +1614,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 04 November 2016
|
||||
Last updated: 28 December 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
|
|
|
@ -78,6 +78,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
/* #undef HAVE_MEMORY_H */
|
||||
|
||||
/* Define to 1 if you have the `mkostemp' function. */
|
||||
/* #undef HAVE_MKOSTEMP */
|
||||
|
||||
/* Define if you have POSIX threads libraries and header files. */
|
||||
/* #undef HAVE_PTHREAD */
|
||||
|
||||
|
@ -90,6 +93,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||
/* #undef HAVE_READLINE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the `secure_getenv' function. */
|
||||
/* #undef HAVE_SECURE_GETENV */
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
/* #undef HAVE_STDINT_H */
|
||||
|
||||
|
@ -308,9 +314,40 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||
/* #undef SUPPORT_VALGRIND */
|
||||
|
||||
/* Enable extensions on AIX 3, Interix. */
|
||||
#ifndef _ALL_SOURCE
|
||||
# define _ALL_SOURCE 1
|
||||
#endif
|
||||
/* Enable GNU extensions on systems that have them. */
|
||||
#ifndef _GNU_SOURCE
|
||||
# define _GNU_SOURCE 1
|
||||
#endif
|
||||
/* Enable threading extensions on Solaris. */
|
||||
#ifndef _POSIX_PTHREAD_SEMANTICS
|
||||
# define _POSIX_PTHREAD_SEMANTICS 1
|
||||
#endif
|
||||
/* Enable extensions on HP NonStop. */
|
||||
#ifndef _TANDEM_SOURCE
|
||||
# define _TANDEM_SOURCE 1
|
||||
#endif
|
||||
/* Enable general extensions on Solaris. */
|
||||
#ifndef __EXTENSIONS__
|
||||
# define __EXTENSIONS__ 1
|
||||
#endif
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.23-RC1"
|
||||
|
||||
/* Define to 1 if on MINIX. */
|
||||
/* #undef _MINIX */
|
||||
|
||||
/* Define to 2 if the system does not provide POSIX.1 features except with
|
||||
this defined. */
|
||||
/* #undef _POSIX_1_SOURCE */
|
||||
|
||||
/* Define to 1 if you need to in order for `stat' and other things to work. */
|
||||
/* #undef _POSIX_SOURCE */
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 23
|
||||
#define PCRE2_PRERELEASE -RC1
|
||||
#define PCRE2_DATE 2016-08-01
|
||||
#define PCRE2_DATE 2017-01-16
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
|
|
|
@ -44,7 +44,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 23
|
||||
#define PCRE2_PRERELEASE -RC1
|
||||
#define PCRE2_DATE 2016-08-01
|
||||
#define PCRE2_DATE 2017-01-16
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
|
@ -465,7 +465,9 @@ PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
|||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_free(pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||
*pcre2_code_copy(const pcre2_code *);
|
||||
*pcre2_code_copy(const pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||
*pcre2_code_copy_with_tables(const pcre2_code *);
|
||||
|
||||
|
||||
/* Functions that give information about a compiled pattern. */
|
||||
|
@ -629,6 +631,7 @@ pcre2_compile are called by application code. */
|
|||
|
||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
|
||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||
|
|
|
@ -116,9 +116,11 @@ them will be able to (i.e. assume a 64-bit world). */
|
|||
|
||||
/* Function definitions to allow mutual recursion */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
static unsigned int
|
||||
add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t, compile_block *,
|
||||
const uint32_t *, unsigned int);
|
||||
add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t,
|
||||
compile_block *, const uint32_t *, unsigned int);
|
||||
#endif
|
||||
|
||||
static int
|
||||
compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t,
|
||||
|
@ -4381,16 +4383,14 @@ return n8; /* Number of 8-bit characters */
|
|||
|
||||
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
/*************************************************
|
||||
* Add a list of characters to a class (internal) *
|
||||
*************************************************/
|
||||
|
||||
/* This function is used for adding a list of case-equivalent characters to a
|
||||
class, and also for adding a list of horizontal or vertical whitespace. If the
|
||||
list is in order (which it should be), ranges of characters are detected and
|
||||
handled appropriately. This function is called (sometimes recursively) only
|
||||
from within the "add to class" set of functions. The external entry point is
|
||||
add_list_to_class().
|
||||
class when in UTF mode. This function is called only from within
|
||||
add_to_class_internal(), with which it is mutually recursive.
|
||||
|
||||
Arguments:
|
||||
classbits the bit map for characters < 256
|
||||
|
@ -4423,6 +4423,7 @@ while (p[0] < NOTACHAR)
|
|||
}
|
||||
return n8;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
@ -4459,8 +4460,11 @@ return add_to_class_internal(classbits, uchardptr, options, cb, start, end);
|
|||
* External entry point for add list to class *
|
||||
*************************************************/
|
||||
|
||||
/* This function sets the overall range so that the internal functions can try
|
||||
to avoid duplication when handling case-independence.
|
||||
/* This function is used for adding a list of horizontal or vertical whitespace
|
||||
characters to a class. The list must be in order so that ranges of characters
|
||||
can be detected and handled appropriately. This function sets the overall range
|
||||
so that the internal functions can try to avoid duplication when handling
|
||||
case-independence.
|
||||
|
||||
Arguments:
|
||||
classbits the bit map for characters < 256
|
||||
|
|
|
@ -1937,6 +1937,8 @@ static int
|
|||
fill_buffer(void *handle, int frtype, char *buffer, int length,
|
||||
BOOL input_line_buffered)
|
||||
{
|
||||
(void)frtype; /* Avoid warning when not used */
|
||||
|
||||
#ifdef SUPPORT_LIBZ
|
||||
if (frtype == FR_LIBZ)
|
||||
return gzread((gzFile)handle, buffer, length);
|
||||
|
|
Loading…
Reference in New Issue