Final file tidies for 10.32
This commit is contained in:
parent
ab30606b01
commit
bf3c7c68ec
10
LICENCE
10
LICENCE
|
@ -4,11 +4,11 @@ PCRE2 LICENCE
|
|||
PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as
|
||||
specified below, with one exemption for certain binary redistributions. The
|
||||
documentation for PCRE2, supplied in the "doc" directory, is distributed under
|
||||
the same terms as the software itself. The data in the testdata directory is
|
||||
not copyrighted and is in the public domain.
|
||||
Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD"
|
||||
licence, as specified below, with one exemption for certain binary
|
||||
redistributions. The documentation for PCRE2, supplied in the "doc" directory,
|
||||
is distributed under the same terms as the software itself. The data in the
|
||||
testdata directory is not copyrighted and is in the public domain.
|
||||
|
||||
The basic library functions are written in C and are freestanding. Also
|
||||
included in the distribution is a just-in-time compiler that can be used to
|
||||
|
|
9
NEWS
9
NEWS
|
@ -1,11 +1,12 @@
|
|||
News about PCRE2 releases
|
||||
-------------------------
|
||||
|
||||
Version 10.32 13-August-2018
|
||||
----------------------------
|
||||
|
||||
Version 10.32 10-September-2018
|
||||
-------------------------------
|
||||
|
||||
This is another mainly bugfix and tidying release with a few minor
|
||||
enhancements.
|
||||
enhancements. These are the main ones:
|
||||
|
||||
1. pcre2grep now supports the inclusion of binary zeros in patterns that are
|
||||
read from files via the -f option.
|
||||
|
@ -22,7 +23,7 @@ parameter now applies to pcre2_dfa_match().
|
|||
|
||||
5. (*ACCEPT:ARG), (*FAIL:ARG), and (*COMMIT:ARG) are now supported.
|
||||
|
||||
6. Added support for \N{U+dddd}, but not in EBCDIC environments.
|
||||
6. Added support for \N{U+dddd}, but only in Unicode mode.
|
||||
|
||||
7. Added support for (?^) to unset all imnsx options.
|
||||
|
||||
|
|
|
@ -10,8 +10,8 @@ dnl be defined as -RC2, for example. For real releases, it should be empty.
|
|||
|
||||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [32])
|
||||
m4_define(pcre2_prerelease, [-RC1])
|
||||
m4_define(pcre2_date, [2018-08-13])
|
||||
m4_define(pcre2_prerelease, [])
|
||||
m4_define(pcre2_date, [2018-09-10])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
@ -839,7 +839,7 @@ AC_SUBST(EXTRA_LIBPCRE2_POSIX_LDFLAGS)
|
|||
|
||||
# When we run 'make distcheck', use these arguments. Turning off compiler
|
||||
# optimization makes it run faster.
|
||||
DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit --enable-utf"
|
||||
DISTCHECK_CONFIGURE_FLAGS="CFLAGS='' CXXFLAGS='' --enable-pcre2-16 --enable-pcre2-32 --enable-jit"
|
||||
AC_SUBST(DISTCHECK_CONFIGURE_FLAGS)
|
||||
|
||||
# Check that, if --enable-pcre2grep-libz or --enable-pcre2grep-libbz2 is
|
||||
|
|
|
@ -1804,7 +1804,8 @@ Unicode support (which is the default). If Unicode support is not available,
|
|||
the use of this option provokes an error. Details of how PCRE2_UTF changes the
|
||||
behaviour of PCRE2 are given in the
|
||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||
page.
|
||||
page. In particular, note that it changes the way PCRE2_CASELESS handles
|
||||
characters with code points greater than 127.
|
||||
<a name="extracompileoptions"></a></P>
|
||||
<br><b>
|
||||
Extra compile options
|
||||
|
@ -2776,7 +2777,7 @@ Elements in the ovector that do not correspond to capturing parentheses in the
|
|||
pattern are never changed. That is, if a pattern contains <i>n</i> capturing
|
||||
parentheses, no more than <i>ovector[0]</i> to <i>ovector[2n+1]</i> are set by
|
||||
<b>pcre2_match()</b>. The other elements retain whatever values they previously
|
||||
had.
|
||||
had. After a failed match attempt, the contents of the ovector are unchanged.
|
||||
<a name="matchotherdata"></a></P>
|
||||
<br><a name="SEC30" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
|
||||
<P>
|
||||
|
@ -3192,6 +3193,12 @@ functions from the match context, if provided, or else those that were used to
|
|||
allocate memory for the compiled code.
|
||||
</P>
|
||||
<P>
|
||||
If an external <i>match_data</i> block is provided, its contents afterwards
|
||||
are those set by the final call to <b>pcre2_match()</b>, which will have
|
||||
ended in a matching error. The contents of the ovector within the match data
|
||||
block may or may not have been changed.
|
||||
</P>
|
||||
<P>
|
||||
The <i>outlengthptr</i> argument must point to a variable that contains the
|
||||
length, in code units, of the output buffer. If the function is successful, the
|
||||
value is updated to contain the length of the new string, excluding the
|
||||
|
@ -3658,7 +3665,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 03 August 2018
|
||||
Last updated: 07 September 2018
|
||||
<br>
|
||||
Copyright © 1997-2018 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -399,14 +399,15 @@ these escapes are as follows:
|
|||
\ddd character with octal code ddd, or backreference
|
||||
\o{ddd..} character with octal code ddd..
|
||||
\xhh character with hex code hh
|
||||
\x{hhh..} character with hex code hhh.. (default mode)
|
||||
\N{U+hhh..} character with Unicode code point hhh..
|
||||
\x{hhh..} character with hex code hhh..
|
||||
\N{U+hhh..} character with Unicode hex code point hhh..
|
||||
\uhhhh character with hex code hhhh (when PCRE2_ALT_BSUX is set)
|
||||
</pre>
|
||||
The \N{U+hhh..} escape sequence is recognized only when the PCRE2_UTF option
|
||||
is set, that is, when PCRE2 is operating in a Unicode mode. Perl also uses
|
||||
\N{name} to specify characters by Unicode name; PCRE2 does not support this.
|
||||
Note that when \N is not followed by an opening brace (curly bracket) it has
|
||||
an entirely different meaning, matching any character that is not a newline.
|
||||
Perl also uses \N{name} to specify characters by Unicode name; PCRE2 does not
|
||||
support this.
|
||||
</P>
|
||||
<P>
|
||||
The precise effect of \cx on ASCII characters is as follows: if x is a lower
|
||||
|
@ -530,7 +531,8 @@ limited to certain values, as follows:
|
|||
Invalid Unicode code points are all those in the range 0xd800 to 0xdfff (the
|
||||
so-called "surrogate" code points). The check for these can be disabled by the
|
||||
caller of <b>pcre2_compile()</b> by setting the option
|
||||
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES.
|
||||
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. However, this is possible only in UTF-8
|
||||
and UTF-32 modes, because these values are not representable in UTF-16.
|
||||
</P>
|
||||
<br><b>
|
||||
Escape sequences in character classes
|
||||
|
@ -3595,13 +3597,16 @@ verbs in subroutines is different in some cases.
|
|||
an immediate backtrack.
|
||||
</P>
|
||||
<P>
|
||||
(*COMMIT), (*SKIP), and (*PRUNE) in a subpattern called as a subroutine cause
|
||||
the subroutine match to fail.
|
||||
(*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail when
|
||||
triggered by being backtracked to in a subpattern called as a subroutine. There
|
||||
is then a backtrack at the outer level.
|
||||
</P>
|
||||
<P>
|
||||
(*THEN) skips to the next alternative in the innermost enclosing group within
|
||||
the subpattern that has alternatives. If there is no such group within the
|
||||
subpattern, (*THEN) causes the subroutine match to fail.
|
||||
(*THEN), when triggered, skips to the next alternative in the innermost
|
||||
enclosing group within the subpattern that has alternatives (its normal
|
||||
behaviour). However, if there is no such group within the subroutine
|
||||
subpattern, the subroutine match fails and there is a backtrack at the outer
|
||||
level.
|
||||
</P>
|
||||
<br><a name="SEC28" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
|
@ -3619,7 +3624,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 03 August 2018
|
||||
Last updated: 04 September 2018
|
||||
<br>
|
||||
Copyright © 1997-2018 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -70,7 +70,7 @@ This table applies to ASCII and Unicode environments.
|
|||
\ddd character with octal code ddd, or backreference
|
||||
\o{ddd..} character with octal code ddd..
|
||||
\U "U" if PCRE2_ALT_BSUX is set (otherwise is an error)
|
||||
\N{U+hh..} character with Unicode code point hh..
|
||||
\N{U+hh..} character with Unicode code point hh.. (Unicode mode only)
|
||||
\uhhhh character with hex code hhhh (if PCRE2_ALT_BSUX is set)
|
||||
\xhh character with hex code hh
|
||||
\x{hh..} character with hex code hh..
|
||||
|
@ -634,7 +634,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 01 August 2018
|
||||
Last updated: 02 September 2018
|
||||
<br>
|
||||
Copyright © 1997-2018 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -26,7 +26,8 @@ you must call
|
|||
with the PCRE2_UTF option flag, or the pattern must start with the sequence
|
||||
(*UTF). When either of these is the case, both the pattern and any subject
|
||||
strings that are matched against it are treated as UTF strings instead of
|
||||
strings of individual one-code-unit characters.
|
||||
strings of individual one-code-unit characters. There are also some other
|
||||
changes to the way characters are handled, as documented below.
|
||||
</P>
|
||||
<P>
|
||||
If you do not need Unicode support you can build PCRE2 without it, in which
|
||||
|
@ -59,6 +60,11 @@ values have to use braced sequences. Unbraced octal code points up to \777 are
|
|||
also recognized; larger ones can be coded using \o{...}.
|
||||
</P>
|
||||
<P>
|
||||
The escape sequence \N{U+<hex digits>} is recognized as another way of
|
||||
specifying a Unicode character by code point in a UTF mode. It is not allowed
|
||||
in non-UTF modes.
|
||||
</P>
|
||||
<P>
|
||||
In UTF modes, repeat quantifiers apply to complete UTF characters, not to
|
||||
individual code units.
|
||||
</P>
|
||||
|
@ -294,9 +300,9 @@ Cambridge, England.
|
|||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 17 May 2017
|
||||
Last updated: 02 September 2018
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
Copyright © 1997-2018 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
|
|
|
@ -1784,7 +1784,8 @@ COMPILING A PATTERN
|
|||
built to include Unicode support (which is the default). If Unicode
|
||||
support is not available, the use of this option provokes an error.
|
||||
Details of how PCRE2_UTF changes the behaviour of PCRE2 are given in
|
||||
the pcre2unicode page.
|
||||
the pcre2unicode page. In particular, note that it changes the way
|
||||
PCRE2_CASELESS handles characters with code points greater than 127.
|
||||
|
||||
Extra compile options
|
||||
|
||||
|
@ -2710,7 +2711,8 @@ HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS
|
|||
in the pattern are never changed. That is, if a pattern contains n cap-
|
||||
turing parentheses, no more than ovector[0] to ovector[2n+1] are set by
|
||||
pcre2_match(). The other elements retain whatever values they previ-
|
||||
ously had.
|
||||
ously had. After a failed match attempt, the contents of the ovector
|
||||
are unchanged.
|
||||
|
||||
|
||||
OTHER INFORMATION ABOUT A MATCH
|
||||
|
@ -3101,6 +3103,11 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
|||
ment functions from the match context, if provided, or else those that
|
||||
were used to allocate memory for the compiled code.
|
||||
|
||||
If an external match_data block is provided, its contents afterwards
|
||||
are those set by the final call to pcre2_match(), which will have ended
|
||||
in a matching error. The contents of the ovector within the match data
|
||||
block may or may not have been changed.
|
||||
|
||||
The outlengthptr argument must point to a variable that contains the
|
||||
length, in code units, of the output buffer. If the function is suc-
|
||||
cessful, the value is updated to contain the length of the new string,
|
||||
|
@ -3542,7 +3549,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 03 August 2018
|
||||
Last updated: 07 September 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -6192,14 +6199,16 @@ BACKSLASH
|
|||
\ddd character with octal code ddd, or backreference
|
||||
\o{ddd..} character with octal code ddd..
|
||||
\xhh character with hex code hh
|
||||
\x{hhh..} character with hex code hhh.. (default mode)
|
||||
\N{U+hhh..} character with Unicode code point hhh..
|
||||
\x{hhh..} character with hex code hhh..
|
||||
\N{U+hhh..} character with Unicode hex code point hhh..
|
||||
\uhhhh character with hex code hhhh (when PCRE2_ALT_BSUX is set)
|
||||
|
||||
Note that when \N is not followed by an opening brace (curly bracket)
|
||||
it has an entirely different meaning, matching any character that is
|
||||
not a newline. Perl also uses \N{name} to specify characters by Uni-
|
||||
code name; PCRE2 does not support this.
|
||||
The \N{U+hhh..} escape sequence is recognized only when the PCRE2_UTF
|
||||
option is set, that is, when PCRE2 is operating in a Unicode mode. Perl
|
||||
also uses \N{name} to specify characters by Unicode name; PCRE2 does
|
||||
not support this. Note that when \N is not followed by an opening
|
||||
brace (curly bracket) it has an entirely different meaning, matching
|
||||
any character that is not a newline.
|
||||
|
||||
The precise effect of \cx on ASCII characters is as follows: if x is a
|
||||
lower case letter, it is converted to upper case. Then bit 6 of the
|
||||
|
@ -6314,7 +6323,9 @@ BACKSLASH
|
|||
Invalid Unicode code points are all those in the range 0xd800 to 0xdfff
|
||||
(the so-called "surrogate" code points). The check for these can be
|
||||
disabled by the caller of pcre2_compile() by setting the option
|
||||
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES.
|
||||
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES. However, this is possible only in
|
||||
UTF-8 and UTF-32 modes, because these values are not representable in
|
||||
UTF-16.
|
||||
|
||||
Escape sequences in character classes
|
||||
|
||||
|
@ -9074,12 +9085,15 @@ BACKTRACKING CONTROL
|
|||
(*FAIL) in a subpattern called as a subroutine has its normal effect:
|
||||
it forces an immediate backtrack.
|
||||
|
||||
(*COMMIT), (*SKIP), and (*PRUNE) in a subpattern called as a subroutine
|
||||
cause the subroutine match to fail.
|
||||
(*COMMIT), (*SKIP), and (*PRUNE) cause the subroutine match to fail
|
||||
when triggered by being backtracked to in a subpattern called as a sub-
|
||||
routine. There is then a backtrack at the outer level.
|
||||
|
||||
(*THEN) skips to the next alternative in the innermost enclosing group
|
||||
within the subpattern that has alternatives. If there is no such group
|
||||
within the subpattern, (*THEN) causes the subroutine match to fail.
|
||||
(*THEN), when triggered, skips to the next alternative in the innermost
|
||||
enclosing group within the subpattern that has alternatives (its normal
|
||||
behaviour). However, if there is no such group within the subroutine
|
||||
subpattern, the subroutine match fails and there is a backtrack at the
|
||||
outer level.
|
||||
|
||||
|
||||
SEE ALSO
|
||||
|
@ -9097,7 +9111,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 03 August 2018
|
||||
Last updated: 04 September 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -9958,7 +9972,7 @@ ESCAPED CHARACTERS
|
|||
\ddd character with octal code ddd, or backreference
|
||||
\o{ddd..} character with octal code ddd..
|
||||
\U "U" if PCRE2_ALT_BSUX is set (otherwise is an error)
|
||||
\N{U+hh..} character with Unicode code point hh..
|
||||
\N{U+hh..} character with Unicode code point hh.. (Unicode mode only)
|
||||
\uhhhh character with hex code hhhh (if PCRE2_ALT_BSUX is set)
|
||||
\xhh character with hex code hh
|
||||
\x{hh..} character with hex code hh..
|
||||
|
@ -10387,7 +10401,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 01 August 2018
|
||||
Last updated: 02 September 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -10410,7 +10424,9 @@ UNICODE AND UTF SUPPORT
|
|||
PCRE2_UTF option flag, or the pattern must start with the sequence
|
||||
(*UTF). When either of these is the case, both the pattern and any sub-
|
||||
ject strings that are matched against it are treated as UTF strings
|
||||
instead of strings of individual one-code-unit characters.
|
||||
instead of strings of individual one-code-unit characters. There are
|
||||
also some other changes to the way characters are handled, as docu-
|
||||
mented below.
|
||||
|
||||
If you do not need Unicode support you can build PCRE2 without it, in
|
||||
which case the library will be smaller.
|
||||
|
@ -10437,6 +10453,10 @@ WIDE CHARACTERS AND UTF MODES
|
|||
Larger values have to use braced sequences. Unbraced octal code points
|
||||
up to \777 are also recognized; larger ones can be coded using \o{...}.
|
||||
|
||||
The escape sequence \N{U+<hex digits>} is recognized as another way of
|
||||
specifying a Unicode character by code point in a UTF mode. It is not
|
||||
allowed in non-UTF modes.
|
||||
|
||||
In UTF modes, repeat quantifiers apply to complete UTF characters, not
|
||||
to individual code units.
|
||||
|
||||
|
@ -10644,8 +10664,8 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 17 May 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
Last updated: 02 September 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
|
|
@ -214,7 +214,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.32-RC1"
|
||||
#define PACKAGE_STRING "PCRE2 10.32"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
@ -223,7 +223,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.32-RC1"
|
||||
#define PACKAGE_VERSION "10.32"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
|
@ -343,7 +343,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#endif
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.32-RC1"
|
||||
#define VERSION "10.32"
|
||||
|
||||
/* Define to 1 if on MINIX. */
|
||||
/* #undef _MINIX */
|
||||
|
|
|
@ -43,8 +43,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 32
|
||||
#define PCRE2_PRERELEASE -RC1
|
||||
#define PCRE2_DATE 2018-08-13
|
||||
#define PCRE2_PRERELEASE
|
||||
#define PCRE2_DATE 2018-09-10
|
||||
|
||||
/* For the benefit of systems without stdint.h, an alternative is to use
|
||||
inttypes.h. The existence of these headers is checked by configure or CMake. */
|
||||
|
@ -316,7 +316,7 @@ pcre2_pattern_convert(). */
|
|||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
|
||||
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
|
||||
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
|
||||
#define PCRE2_ERROR_NOT_SUPPORTED_IN_EBCDIC 193
|
||||
#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
|
||||
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue