File tidies for 10.35-RC1 release candidate.
This commit is contained in:
parent
cf670e3bb9
commit
8b3f8af535
6
AUTHORS
6
AUTHORS
|
@ -8,7 +8,7 @@ Email domain: cam.ac.uk
|
|||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2019 University of Cambridge
|
||||
Copyright (c) 1997-2020 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
|
@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2019 Zoltan Herczeg
|
||||
Copyright(c) 2010-2020 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2019 Zoltan Herczeg
|
||||
Copyright(c) 2009-2020 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
####
|
||||
|
|
52
ChangeLog
52
ChangeLog
|
@ -1,8 +1,8 @@
|
|||
Change Log for PCRE2
|
||||
--------------------
|
||||
|
||||
Version 10.35
|
||||
-------------
|
||||
Version 10.35 15-April-2020
|
||||
---------------------------
|
||||
|
||||
1. Use PCRE2_MATCH_EMPTY flag to detect empty matches in JIT.
|
||||
|
||||
|
@ -45,19 +45,19 @@ the minimum.
|
|||
|
||||
12. The JIT stack should be freed when the low-level stack allocation fails.
|
||||
|
||||
13. In pcre2grep, if the final line in a scanned file is output but does not
|
||||
13. In pcre2grep, if the final line in a scanned file is output but does not
|
||||
end with a newline sequence, add a newline according to the --newline setting.
|
||||
|
||||
14. (?(DEFINE)...) groups were not being handled correctly when checking for
|
||||
the fixed length of a lookbehind assertion. Such a group within a lookbehind
|
||||
should be skipped, as it does not contribute to the length of the group.
|
||||
Instead, the (DEFINE) group was being processed, and if at the end of the
|
||||
lookbehind, that end was not correctly recognized. Errors such as "lookbehind
|
||||
assertion is not fixed length" and also "internal error: bad code value in
|
||||
14. (?(DEFINE)...) groups were not being handled correctly when checking for
|
||||
the fixed length of a lookbehind assertion. Such a group within a lookbehind
|
||||
should be skipped, as it does not contribute to the length of the group.
|
||||
Instead, the (DEFINE) group was being processed, and if at the end of the
|
||||
lookbehind, that end was not correctly recognized. Errors such as "lookbehind
|
||||
assertion is not fixed length" and also "internal error: bad code value in
|
||||
parsed_skip()" could result.
|
||||
|
||||
15. Put a limit of 1000 on recursive calls in pcre2_study() when searching
|
||||
nested groups for starting code units, in order to avoid stack overflow issues.
|
||||
15. Put a limit of 1000 on recursive calls in pcre2_study() when searching
|
||||
nested groups for starting code units, in order to avoid stack overflow issues.
|
||||
If the limit is reached, it just gives up trying for this optimization.
|
||||
|
||||
16. The control verb chain list must always be restored when exiting from a
|
||||
|
@ -66,9 +66,9 @@ recurse function in JIT.
|
|||
17. Fix a crash which occurs when the character type of an invalid UTF
|
||||
character is decoded in JIT.
|
||||
|
||||
18. Changes in many areas of the code so that when Unicode is supported and
|
||||
PCRE2_UCP is set without PCRE2_UTF, Unicode character properties are used for
|
||||
upper/lower case computations on characters whose code points are greater than
|
||||
18. Changes in many areas of the code so that when Unicode is supported and
|
||||
PCRE2_UCP is set without PCRE2_UTF, Unicode character properties are used for
|
||||
upper/lower case computations on characters whose code points are greater than
|
||||
127.
|
||||
|
||||
19. The function for checking UTF-16 validity was returning an incorrect offset
|
||||
|
@ -77,24 +77,24 @@ low surrogate. This caused incorrect behaviour, for example when
|
|||
PCRE2_MATCH_INVALID_UTF was set and a match started immediately following the
|
||||
invalid high surrogate, such as /aa/ matching "\x{d800}aa".
|
||||
|
||||
20. If a DEFINE group immediately preceded a lookbehind assertion, the pattern
|
||||
could be mis-compiled and therefore not match correctly. This is the example
|
||||
that found this: /(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word/ which failed to
|
||||
20. If a DEFINE group immediately preceded a lookbehind assertion, the pattern
|
||||
could be mis-compiled and therefore not match correctly. This is the example
|
||||
that found this: /(?(DEFINE)(?<foo>bar))(?<![-a-z0-9])word/ which failed to
|
||||
match "word" because the "move back" value was set to zero.
|
||||
|
||||
21. Following a request from a user, some extensions and tidies to the
|
||||
21. Following a request from a user, some extensions and tidies to the
|
||||
character tables handling have been done:
|
||||
|
||||
(a) The dftables auxiliary program is renamed pcre2_dftables, but it is still
|
||||
(a) The dftables auxiliary program is renamed pcre2_dftables, but it is still
|
||||
not installed for public use.
|
||||
|
||||
(b) There is now a -b option for pcre2_dftables, which causes the tables to
|
||||
|
||||
(b) There is now a -b option for pcre2_dftables, which causes the tables to
|
||||
be written in binary. There is also a -help option.
|
||||
|
||||
(c) PCRE2_CONFIG_TABLES_LENGTH is added to pcre2_config() so that an
|
||||
application that wants to save tables in binary knows how long they are.
|
||||
|
||||
22. Changed setting of CMAKE_MODULE_PATH in CMakeLists.txt from SET to
|
||||
|
||||
(c) PCRE2_CONFIG_TABLES_LENGTH is added to pcre2_config() so that an
|
||||
application that wants to save tables in binary knows how long they are.
|
||||
|
||||
22. Changed setting of CMAKE_MODULE_PATH in CMakeLists.txt from SET to
|
||||
LIST(APPEND...) to allow a setting from the command line to be included.
|
||||
|
||||
23. Updated to Unicode 13.0.0.
|
||||
|
|
6
LICENCE
6
LICENCE
|
@ -26,7 +26,7 @@ Email domain: cam.ac.uk
|
|||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2019 University of Cambridge
|
||||
Copyright (c) 1997-2020 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -37,7 +37,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2019 Zoltan Herczeg
|
||||
Copyright(c) 2010-2020 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -48,7 +48,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2019 Zoltan Herczeg
|
||||
Copyright(c) 2009-2020 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
|
21
NEWS
21
NEWS
|
@ -2,6 +2,27 @@ News about PCRE2 releases
|
|||
-------------------------
|
||||
|
||||
|
||||
Version 10.35 15-April-2020
|
||||
---------------------------
|
||||
|
||||
Bugfixes, tidies, and a few new enhancements.
|
||||
|
||||
1. Capturing groups that contain recursive backreferences to themselves are no
|
||||
longer automatically atomic, because the restriction is no longer necessary
|
||||
as a result of the 10.30 restructuring.
|
||||
|
||||
2. Several new options for pcre2_substitute().
|
||||
|
||||
3. When Unicode is supported and PCRE2_UCP is set without PCRE2_UTF, Unicode
|
||||
character properties are used for upper/lower case computations on characters
|
||||
whose code points are greater than 127.
|
||||
|
||||
4. The character tabless (for low-valued characters) can now more easily be
|
||||
saved and restored in binary.
|
||||
|
||||
5. Updated to Unicode 13.0.0.
|
||||
|
||||
|
||||
Version 10.34 21-November-2019
|
||||
------------------------------
|
||||
|
||||
|
|
4
README
4
README
|
@ -562,7 +562,7 @@ not be a problem.
|
|||
If you need to modify the character tables when cross-compiling, you should
|
||||
move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
|
||||
hand and run it on the local host to make a new version of
|
||||
pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
|
||||
pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
|
||||
at build time" for more details.
|
||||
|
||||
|
||||
|
@ -753,7 +753,7 @@ that represent character classes for code points less than 256. The final
|
|||
|
||||
1 white space character
|
||||
2 letter
|
||||
4 lower case letter
|
||||
4 lower case letter
|
||||
8 decimal digit
|
||||
16 alphanumeric or '_'
|
||||
|
||||
|
|
|
@ -11,15 +11,15 @@ dnl be defined as -RC2, for example. For real releases, it should be empty.
|
|||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [35])
|
||||
m4_define(pcre2_prerelease, [-RC1])
|
||||
m4_define(pcre2_date, [2019-11-27])
|
||||
m4_define(pcre2_date, [2020-04-15])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre2_8_version, [9:0:9])
|
||||
m4_define(libpcre2_16_version, [9:0:9])
|
||||
m4_define(libpcre2_32_version, [9:0:9])
|
||||
m4_define(libpcre2_8_version, [10:0:10])
|
||||
m4_define(libpcre2_16_version, [10:0:10])
|
||||
m4_define(libpcre2_32_version, [10:0:10])
|
||||
m4_define(libpcre2_posix_version, [2:3:0])
|
||||
|
||||
AC_PREREQ(2.57)
|
||||
|
|
|
@ -562,7 +562,7 @@ not be a problem.
|
|||
If you need to modify the character tables when cross-compiling, you should
|
||||
move pcre2_chartables.c.dist out of the way, then compile pcre2_dftables.c by
|
||||
hand and run it on the local host to make a new version of
|
||||
pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
|
||||
pcre2_chartables.c.dist. See the pcre2build section "Creating character tables
|
||||
at build time" for more details.
|
||||
|
||||
|
||||
|
@ -753,7 +753,7 @@ that represent character classes for code points less than 256. The final
|
|||
|
||||
1 white space character
|
||||
2 letter
|
||||
4 lower case letter
|
||||
4 lower case letter
|
||||
8 decimal digit
|
||||
16 alphanumeric or '_'
|
||||
|
||||
|
|
|
@ -28,11 +28,11 @@ DESCRIPTION
|
|||
<P>
|
||||
This function sets a pointer to custom character tables within a compile
|
||||
context. The second argument must point to a set of PCRE2 character tables or
|
||||
be NULL to request the default tables. The result is always zero. Character
|
||||
tables can be created by calling <b>pcre2_maketables()</b> or by running the
|
||||
be NULL to request the default tables. The result is always zero. Character
|
||||
tables can be created by calling <b>pcre2_maketables()</b> or by running the
|
||||
<b>pcre2_dftables</b> maintenance command in binary mode (see the
|
||||
<a href="pcre2build.html"><b>pcre2build</b></a>
|
||||
documentation).
|
||||
documentation).
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -82,7 +82,7 @@ zero-terminated strings. The options are:
|
|||
PCRE2_SUBSTITUTE_LITERAL The replacement string is literal
|
||||
PCRE2_SUBSTITUTE_MATCHED Use pre-existing match data for 1st match
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH If overflow, compute needed length
|
||||
PCRE2_SUBSTITUTE_REPLACEMENT_ONLY Return only replacement string(s)
|
||||
PCRE2_SUBSTITUTE_REPLACEMENT_ONLY Return only replacement string(s)
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_UNSET Treat unknown group as unset
|
||||
PCRE2_SUBSTITUTE_UNSET_EMPTY Simple unset insert = empty string
|
||||
</pre>
|
||||
|
|
|
@ -1228,7 +1228,7 @@ uint32_t integer that is always set to zero.
|
|||
<pre>
|
||||
PCRE2_CONFIG_TABLES_LENGTH
|
||||
</pre>
|
||||
The output is a uint32_t integer that gives the length of PCRE2's character
|
||||
The output is a uint32_t integer that gives the length of PCRE2's character
|
||||
processing tables in bytes. For details of these tables see the
|
||||
<a href="#localesupport">section on locale support</a>
|
||||
below.
|
||||
|
@ -1489,7 +1489,7 @@ documentation.
|
|||
</pre>
|
||||
If this bit is set, letters in the pattern match both upper and lower case
|
||||
letters in the subject. It is equivalent to Perl's /i option, and it can be
|
||||
changed within a pattern by a (?i) option setting. If either PCRE2_UTF or
|
||||
changed within a pattern by a (?i) option setting. If either PCRE2_UTF or
|
||||
PCRE2_UCP is set, Unicode properties are used for all characters with more than
|
||||
one other case, and for all characters whose code points are greater than
|
||||
U+007F. For lower valued characters with only one other case, a lookup table is
|
||||
|
@ -1837,7 +1837,7 @@ the section on
|
|||
in the
|
||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||
page. If you set PCRE2_UCP, matching one of the items it affects takes much
|
||||
longer.
|
||||
longer.
|
||||
</P>
|
||||
<P>
|
||||
The second effect of PCRE2_UCP is to force the use of Unicode properties for
|
||||
|
@ -2012,10 +2012,10 @@ PCRE2 handles caseless matching, and determines whether characters are letters,
|
|||
digits, or whatever, by reference to a set of tables, indexed by character code
|
||||
point. However, this applies only to characters whose code points are less than
|
||||
256. By default, higher-valued code points never match escapes such as \w or
|
||||
\d.
|
||||
\d.
|
||||
</P>
|
||||
<P>
|
||||
When PCRE2 is built with Unicode support (the default), the Unicode properties
|
||||
When PCRE2 is built with Unicode support (the default), the Unicode properties
|
||||
of all characters can be tested with \p and \P, or, alternatively, the
|
||||
PCRE2_UCP option can be set when a pattern is compiled; this causes \w and
|
||||
friends to use Unicode property support instead of the built-in tables.
|
||||
|
@ -3532,8 +3532,8 @@ terminating a \Q quoted sequence) reverts to no case forcing. The sequences
|
|||
\u and \l force the next character (if it is a letter) to upper or lower
|
||||
case, respectively, and then the state automatically reverts to no case
|
||||
forcing. Case forcing applies to all inserted characters, including those from
|
||||
capture groups and letters within \Q...\E quoted sequences. If either
|
||||
PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
|
||||
capture groups and letters within \Q...\E quoted sequences. If either
|
||||
PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
|
||||
properties are used for case forcing characters whose code points are greater
|
||||
than 127.
|
||||
</P>
|
||||
|
|
|
@ -342,23 +342,23 @@ host and therefore not compiled with the cross compiler.
|
|||
If you need to create alternative tables when cross compiling, you will have to
|
||||
do so "by hand". There may also be other reasons for creating tables manually.
|
||||
To cause <b>pcre2_dftables</b> to be built on the local host, run a normal
|
||||
compiling command, and then run the program with the output file as its
|
||||
compiling command, and then run the program with the output file as its
|
||||
argument, for example:
|
||||
<pre>
|
||||
cc src/pcre2_dftables.c -o pcre2_dftables
|
||||
./pcre2_dftables src/pcre2_chartables.c
|
||||
./pcre2_dftables src/pcre2_chartables.c
|
||||
</pre>
|
||||
This builds the tables in the default locale of the local host. If you want to
|
||||
This builds the tables in the default locale of the local host. If you want to
|
||||
specify a locale, you must use the -L option:
|
||||
<pre>
|
||||
LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
|
||||
</pre>
|
||||
You can also specify -b (with or without -L). This causes the tables to be
|
||||
written in binary instead of as source code. A set of binary tables can be
|
||||
loaded into memory by an application and passed to <b>pcre2_compile()</b> in the
|
||||
same way as tables created by calling <b>pcre2_maketables()</b>. The tables are
|
||||
just a string of bytes, independent of hardware characteristics such as
|
||||
endianness. This means they can be bundled with an application that runs in
|
||||
You can also specify -b (with or without -L). This causes the tables to be
|
||||
written in binary instead of as source code. A set of binary tables can be
|
||||
loaded into memory by an application and passed to <b>pcre2_compile()</b> in the
|
||||
same way as tables created by calling <b>pcre2_maketables()</b>. The tables are
|
||||
just a string of bytes, independent of hardware characteristics such as
|
||||
endianness. This means they can be bundled with an application that runs in
|
||||
different environments, to ensure consistent behaviour.
|
||||
</P>
|
||||
<br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br>
|
||||
|
|
|
@ -647,7 +647,7 @@ use of JIT at run time. It is provided for testing and working round problems.
|
|||
It should never be needed in normal use.
|
||||
</P>
|
||||
<P>
|
||||
<b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i>
|
||||
<b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i>
|
||||
When there is a match, instead of outputting the whole line that matched,
|
||||
output just the given text, followed by an operating-system standard newline.
|
||||
The <b>--newline</b> option has no effect on this option, which is mutually
|
||||
|
|
|
@ -114,7 +114,7 @@ Another special sequence that may appear at the start of a pattern is (*UCP).
|
|||
This has the same effect as setting the PCRE2_UCP option: it causes sequences
|
||||
such as \d and \w to use Unicode properties to determine character types,
|
||||
instead of recognizing only characters with codes less than 256 via a lookup
|
||||
table. If also causes upper/lower casing operations to use Unicode properties
|
||||
table. If also causes upper/lower casing operations to use Unicode properties
|
||||
for characters with code points greater than 127, even when UTF is not set.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -2664,8 +2664,8 @@ as before because nothing has changed, so using a non-atomic assertion just
|
|||
wastes resources.
|
||||
</P>
|
||||
<P>
|
||||
There is one exception to backtracking into a non-atomic assertion. If an
|
||||
(*ACCEPT) control verb is triggered, the assertion succeeds atomically. That
|
||||
There is one exception to backtracking into a non-atomic assertion. If an
|
||||
(*ACCEPT) control verb is triggered, the assertion succeeds atomically. That
|
||||
is, a subsequent match failure cannot backtrack into the assertion.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -379,7 +379,7 @@ described in the section entitled "Saving and restoring compiled patterns"
|
|||
#loadtables <filename>
|
||||
</pre>
|
||||
This command is used to load a set of binary character tables that can be
|
||||
accessed by the tables=3 qualifier. Such tables can be created by the
|
||||
accessed by the tables=3 qualifier. Such tables can be created by the
|
||||
<b>pcre2_dftables</b> program with the -b option.
|
||||
<pre>
|
||||
#newline_default [<newline-list>]
|
||||
|
@ -1041,7 +1041,7 @@ with different character tables. The digit specifies the tables as follows:
|
|||
1 the default ASCII tables, as distributed in
|
||||
pcre2_chartables.c.dist
|
||||
2 a set of tables defining ISO 8859 characters
|
||||
3 a set of tables loaded by the #loadtables command
|
||||
3 a set of tables loaded by the #loadtables command
|
||||
</pre>
|
||||
In tables 2, some characters whose codes are greater than 128 are identified as
|
||||
letters, digits, spaces, etc. Tables 3 can be used only after a
|
||||
|
@ -1072,9 +1072,9 @@ process.
|
|||
substitute_callout use substitution callouts
|
||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_matched use PCRE2_SUBSTITUTE_MATCHED
|
||||
substitute_matched use PCRE2_SUBSTITUTE_MATCHED
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
|
||||
substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
|
||||
substitute_skip=<n> skip substitution <n>
|
||||
substitute_stop=<n> skip substitution <n> and following
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
|
@ -1244,10 +1244,10 @@ pattern.
|
|||
startoffset=<n> same as offset=<n>
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_matched use PCRE2_SUBSTITUTE_MATCHED
|
||||
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_matched use PCRE2_SUBSTITUTE_MATCHED
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
|
||||
substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_stop=<n> skip substitution number n and greater
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
|
@ -1409,7 +1409,7 @@ Testing the substitution function
|
|||
</b><br>
|
||||
<P>
|
||||
If the <b>replace</b> modifier is set, the <b>pcre2_substitute()</b> function is
|
||||
called instead of one of the matching functions (or after one call of
|
||||
called instead of one of the matching functions (or after one call of
|
||||
<b>pcre2_match()</b> in the case of PCRE2_SUBSTITUTE_MATCHED). Note that
|
||||
replacement strings cannot contain commas, because a comma signifies the end of
|
||||
a modifier. This is not thought to be an issue in a test program.
|
||||
|
@ -1428,10 +1428,10 @@ for <b>pcre2_substitute()</b>:
|
|||
<pre>
|
||||
global PCRE2_SUBSTITUTE_GLOBAL
|
||||
substitute_extended PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_literal PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_matched PCRE2_SUBSTITUTE_MATCHED
|
||||
substitute_literal PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_matched PCRE2_SUBSTITUTE_MATCHED
|
||||
substitute_overflow_length PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
|
||||
substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
|
||||
substitute_unknown_unset PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY
|
||||
</pre>
|
||||
|
|
|
@ -142,7 +142,7 @@ of Unicode properties except for characters whose code points are less than 128
|
|||
and that have at most two case-equivalent values. For these, a direct table
|
||||
lookup is used for speed. A few Unicode characters such as Greek sigma have
|
||||
more than two code points that are case-equivalent, and these are treated
|
||||
specially. Setting PCRE2_UCP without PCRE2_UTF allows Unicode-style case
|
||||
specially. Setting PCRE2_UCP without PCRE2_UTF allows Unicode-style case
|
||||
processing for non-UTF character encodings such as UCS-2.
|
||||
<a name="scriptruns"></a></P>
|
||||
<br><b>
|
||||
|
|
|
@ -180,8 +180,8 @@ REVISION
|
|||
Last updated: 17 September 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2API(3) Library Functions Manual PCRE2API(3)
|
||||
|
||||
|
||||
|
@ -3796,8 +3796,8 @@ REVISION
|
|||
Last updated: 19 March 2020
|
||||
Copyright (c) 1997-2020 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3)
|
||||
|
||||
|
||||
|
@ -4390,8 +4390,8 @@ REVISION
|
|||
Last updated: 20 March 2020
|
||||
Copyright (c) 1997-2020 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3)
|
||||
|
||||
|
||||
|
@ -4820,8 +4820,8 @@ REVISION
|
|||
Last updated: 03 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3)
|
||||
|
||||
|
||||
|
@ -5029,8 +5029,8 @@ REVISION
|
|||
Last updated: 13 July 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2JIT(3) Library Functions Manual PCRE2JIT(3)
|
||||
|
||||
|
||||
|
@ -5454,8 +5454,8 @@ REVISION
|
|||
Last updated: 23 May 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3)
|
||||
|
||||
|
||||
|
@ -5524,8 +5524,8 @@ REVISION
|
|||
Last updated: 02 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3)
|
||||
|
||||
|
||||
|
@ -5748,8 +5748,8 @@ REVISION
|
|||
Last updated: 23 May 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3)
|
||||
|
||||
|
||||
|
@ -6128,8 +6128,8 @@ REVISION
|
|||
Last updated: 04 September 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3)
|
||||
|
||||
|
||||
|
@ -9562,8 +9562,8 @@ REVISION
|
|||
Last updated: 24 February 2020
|
||||
Copyright (c) 1997-2020 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3)
|
||||
|
||||
|
||||
|
@ -9797,8 +9797,8 @@ REVISION
|
|||
Last updated: 03 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2POSIX(3) Library Functions Manual PCRE2POSIX(3)
|
||||
|
||||
|
||||
|
@ -10127,8 +10127,8 @@ REVISION
|
|||
Last updated: 30 January 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3)
|
||||
|
||||
|
||||
|
@ -10406,8 +10406,8 @@ REVISION
|
|||
Last updated: 27 June 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2SYNTAX(3) Library Functions Manual PCRE2SYNTAX(3)
|
||||
|
||||
|
||||
|
@ -10922,8 +10922,8 @@ REVISION
|
|||
Last updated: 28 December 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3)
|
||||
|
||||
|
||||
|
@ -11357,5 +11357,5 @@ REVISION
|
|||
Last updated: 23 February 2020
|
||||
Copyright (c) 1997-2020 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -16,13 +16,13 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
.sp
|
||||
This function sets a pointer to custom character tables within a compile
|
||||
context. The second argument must point to a set of PCRE2 character tables or
|
||||
be NULL to request the default tables. The result is always zero. Character
|
||||
tables can be created by calling \fBpcre2_maketables()\fP or by running the
|
||||
be NULL to request the default tables. The result is always zero. Character
|
||||
tables can be created by calling \fBpcre2_maketables()\fP or by running the
|
||||
\fBpcre2_dftables\fP maintenance command in binary mode (see the
|
||||
.\" HREF
|
||||
\fBpcre2build\fP
|
||||
.\"
|
||||
documentation).
|
||||
documentation).
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
.\" HREF
|
||||
|
|
|
@ -73,7 +73,7 @@ zero-terminated strings. The options are:
|
|||
PCRE2_SUBSTITUTE_LITERAL The replacement string is literal
|
||||
PCRE2_SUBSTITUTE_MATCHED Use pre-existing match data for 1st match
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH If overflow, compute needed length
|
||||
PCRE2_SUBSTITUTE_REPLACEMENT_ONLY Return only replacement string(s)
|
||||
PCRE2_SUBSTITUTE_REPLACEMENT_ONLY Return only replacement string(s)
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_UNSET Treat unknown group as unset
|
||||
PCRE2_SUBSTITUTE_UNSET_EMPTY Simple unset insert = empty string
|
||||
.sp
|
||||
|
|
|
@ -1156,7 +1156,7 @@ uint32_t integer that is always set to zero.
|
|||
.sp
|
||||
PCRE2_CONFIG_TABLES_LENGTH
|
||||
.sp
|
||||
The output is a uint32_t integer that gives the length of PCRE2's character
|
||||
The output is a uint32_t integer that gives the length of PCRE2's character
|
||||
processing tables in bytes. For details of these tables see the
|
||||
.\" HTML <a href="#localesupport">
|
||||
.\" </a>
|
||||
|
@ -1431,7 +1431,7 @@ documentation.
|
|||
.sp
|
||||
If this bit is set, letters in the pattern match both upper and lower case
|
||||
letters in the subject. It is equivalent to Perl's /i option, and it can be
|
||||
changed within a pattern by a (?i) option setting. If either PCRE2_UTF or
|
||||
changed within a pattern by a (?i) option setting. If either PCRE2_UTF or
|
||||
PCRE2_UCP is set, Unicode properties are used for all characters with more than
|
||||
one other case, and for all characters whose code points are greater than
|
||||
U+007F. For lower valued characters with only one other case, a lookup table is
|
||||
|
@ -1794,7 +1794,7 @@ in the
|
|||
\fBpcre2pattern\fP
|
||||
.\"
|
||||
page. If you set PCRE2_UCP, matching one of the items it affects takes much
|
||||
longer.
|
||||
longer.
|
||||
.P
|
||||
The second effect of PCRE2_UCP is to force the use of Unicode properties for
|
||||
upper/lower casing operations on characters with code points greater than 127,
|
||||
|
@ -1974,9 +1974,9 @@ PCRE2 handles caseless matching, and determines whether characters are letters,
|
|||
digits, or whatever, by reference to a set of tables, indexed by character code
|
||||
point. However, this applies only to characters whose code points are less than
|
||||
256. By default, higher-valued code points never match escapes such as \ew or
|
||||
\ed.
|
||||
\ed.
|
||||
.P
|
||||
When PCRE2 is built with Unicode support (the default), the Unicode properties
|
||||
When PCRE2 is built with Unicode support (the default), the Unicode properties
|
||||
of all characters can be tested with \ep and \eP, or, alternatively, the
|
||||
PCRE2_UCP option can be set when a pattern is compiled; this causes \ew and
|
||||
friends to use Unicode property support instead of the built-in tables.
|
||||
|
@ -3537,8 +3537,8 @@ terminating a \eQ quoted sequence) reverts to no case forcing. The sequences
|
|||
\eu and \el force the next character (if it is a letter) to upper or lower
|
||||
case, respectively, and then the state automatically reverts to no case
|
||||
forcing. Case forcing applies to all inserted characters, including those from
|
||||
capture groups and letters within \eQ...\eE quoted sequences. If either
|
||||
PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
|
||||
capture groups and letters within \eQ...\eE quoted sequences. If either
|
||||
PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
|
||||
properties are used for case forcing characters whose code points are greater
|
||||
than 127.
|
||||
.P
|
||||
|
|
|
@ -338,23 +338,23 @@ host and therefore not compiled with the cross compiler.
|
|||
If you need to create alternative tables when cross compiling, you will have to
|
||||
do so "by hand". There may also be other reasons for creating tables manually.
|
||||
To cause \fBpcre2_dftables\fP to be built on the local host, run a normal
|
||||
compiling command, and then run the program with the output file as its
|
||||
compiling command, and then run the program with the output file as its
|
||||
argument, for example:
|
||||
.sp
|
||||
cc src/pcre2_dftables.c -o pcre2_dftables
|
||||
./pcre2_dftables src/pcre2_chartables.c
|
||||
./pcre2_dftables src/pcre2_chartables.c
|
||||
.sp
|
||||
This builds the tables in the default locale of the local host. If you want to
|
||||
This builds the tables in the default locale of the local host. If you want to
|
||||
specify a locale, you must use the -L option:
|
||||
.sp
|
||||
LC_ALL=fr_FR ./pcre2_dftables -L src/pcre2_chartables.c
|
||||
.sp
|
||||
You can also specify -b (with or without -L). This causes the tables to be
|
||||
written in binary instead of as source code. A set of binary tables can be
|
||||
loaded into memory by an application and passed to \fBpcre2_compile()\fP in the
|
||||
same way as tables created by calling \fBpcre2_maketables()\fP. The tables are
|
||||
just a string of bytes, independent of hardware characteristics such as
|
||||
endianness. This means they can be bundled with an application that runs in
|
||||
.sp
|
||||
You can also specify -b (with or without -L). This causes the tables to be
|
||||
written in binary instead of as source code. A set of binary tables can be
|
||||
loaded into memory by an application and passed to \fBpcre2_compile()\fP in the
|
||||
same way as tables created by calling \fBpcre2_maketables()\fP. The tables are
|
||||
just a string of bytes, independent of hardware characteristics such as
|
||||
endianness. This means they can be bundled with an application that runs in
|
||||
different environments, to ensure consistent behaviour.
|
||||
.
|
||||
.
|
||||
|
|
|
@ -564,7 +564,7 @@ was explicitly disabled at build time. This option can be used to disable the
|
|||
use of JIT at run time. It is provided for testing and working round problems.
|
||||
It should never be needed in normal use.
|
||||
.TP
|
||||
\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP
|
||||
\fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP
|
||||
When there is a match, instead of outputting the whole line that matched,
|
||||
output just the given text, followed by an operating-system standard newline.
|
||||
The \fB--newline\fP option has no effect on this option, which is mutually
|
||||
|
|
|
@ -75,7 +75,7 @@ Another special sequence that may appear at the start of a pattern is (*UCP).
|
|||
This has the same effect as setting the PCRE2_UCP option: it causes sequences
|
||||
such as \ed and \ew to use Unicode properties to determine character types,
|
||||
instead of recognizing only characters with codes less than 256 via a lookup
|
||||
table. If also causes upper/lower casing operations to use Unicode properties
|
||||
table. If also causes upper/lower casing operations to use Unicode properties
|
||||
for characters with code points greater than 127, even when UTF is not set.
|
||||
.P
|
||||
Some applications that allow their users to supply patterns may wish to
|
||||
|
@ -2676,8 +2676,8 @@ pattern. If this is not the case, the rest of the pattern match fails exactly
|
|||
as before because nothing has changed, so using a non-atomic assertion just
|
||||
wastes resources.
|
||||
.P
|
||||
There is one exception to backtracking into a non-atomic assertion. If an
|
||||
(*ACCEPT) control verb is triggered, the assertion succeeds atomically. That
|
||||
There is one exception to backtracking into a non-atomic assertion. If an
|
||||
(*ACCEPT) control verb is triggered, the assertion succeeds atomically. That
|
||||
is, a subsequent match failure cannot backtrack into the assertion.
|
||||
.P
|
||||
Non-atomic assertions are not supported by the alternative matching function
|
||||
|
|
|
@ -538,7 +538,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
|
|||
(?*...) )
|
||||
(*napla:...) ) synonyms
|
||||
(*non_atomic_positive_lookahead:...) )
|
||||
.sp
|
||||
.sp
|
||||
(?<*...) )
|
||||
(*naplb:...) ) synonyms
|
||||
(*non_atomic_positive_lookbehind:...) )
|
||||
|
|
|
@ -330,7 +330,7 @@ below.
|
|||
#loadtables <filename>
|
||||
.sp
|
||||
This command is used to load a set of binary character tables that can be
|
||||
accessed by the tables=3 qualifier. Such tables can be created by the
|
||||
accessed by the tables=3 qualifier. Such tables can be created by the
|
||||
\fBpcre2_dftables\fP program with the -b option.
|
||||
.sp
|
||||
#newline_default [<newline-list>]
|
||||
|
@ -1002,7 +1002,7 @@ with different character tables. The digit specifies the tables as follows:
|
|||
1 the default ASCII tables, as distributed in
|
||||
pcre2_chartables.c.dist
|
||||
2 a set of tables defining ISO 8859 characters
|
||||
3 a set of tables loaded by the #loadtables command
|
||||
3 a set of tables loaded by the #loadtables command
|
||||
.sp
|
||||
In tables 2, some characters whose codes are greater than 128 are identified as
|
||||
letters, digits, spaces, etc. Tables 3 can be used only after a
|
||||
|
@ -1033,9 +1033,9 @@ process.
|
|||
substitute_callout use substitution callouts
|
||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_matched use PCRE2_SUBSTITUTE_MATCHED
|
||||
substitute_matched use PCRE2_SUBSTITUTE_MATCHED
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
|
||||
substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
|
||||
substitute_skip=<n> skip substitution <n>
|
||||
substitute_stop=<n> skip substitution <n> and following
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
|
@ -1212,10 +1212,10 @@ pattern.
|
|||
startoffset=<n> same as offset=<n>
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_matched use PCRE2_SUBSTITUTE_MATCHED
|
||||
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_matched use PCRE2_SUBSTITUTE_MATCHED
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
|
||||
substitute_replacement_only use PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_stop=<n> skip substitution number n and greater
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
|
@ -1379,7 +1379,7 @@ by name.
|
|||
.rs
|
||||
.sp
|
||||
If the \fBreplace\fP modifier is set, the \fBpcre2_substitute()\fP function is
|
||||
called instead of one of the matching functions (or after one call of
|
||||
called instead of one of the matching functions (or after one call of
|
||||
\fBpcre2_match()\fP in the case of PCRE2_SUBSTITUTE_MATCHED). Note that
|
||||
replacement strings cannot contain commas, because a comma signifies the end of
|
||||
a modifier. This is not thought to be an issue in a test program.
|
||||
|
@ -1396,10 +1396,10 @@ for \fBpcre2_substitute()\fP:
|
|||
.sp
|
||||
global PCRE2_SUBSTITUTE_GLOBAL
|
||||
substitute_extended PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_literal PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_matched PCRE2_SUBSTITUTE_MATCHED
|
||||
substitute_literal PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_matched PCRE2_SUBSTITUTE_MATCHED
|
||||
substitute_overflow_length PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
|
||||
substitute_replacement_only PCRE2_SUBSTITUTE_REPLACEMENT_ONLY
|
||||
substitute_unknown_unset PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY
|
||||
.sp
|
||||
|
|
|
@ -134,7 +134,7 @@ of Unicode properties except for characters whose code points are less than 128
|
|||
and that have at most two case-equivalent values. For these, a direct table
|
||||
lookup is used for speed. A few Unicode characters such as Greek sigma have
|
||||
more than two code points that are case-equivalent, and these are treated
|
||||
specially. Setting PCRE2_UCP without PCRE2_UTF allows Unicode-style case
|
||||
specially. Setting PCRE2_UCP without PCRE2_UTF allows Unicode-style case
|
||||
processing for non-UTF character encodings such as UCS-2.
|
||||
.
|
||||
.
|
||||
|
|
|
@ -218,7 +218,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.34"
|
||||
#define PACKAGE_STRING "PCRE2 10.35-RC1"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
@ -227,7 +227,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.34"
|
||||
#define PACKAGE_VERSION "10.35-RC1"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
|
@ -352,7 +352,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#endif
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.34"
|
||||
#define VERSION "10.35-RC1"
|
||||
|
||||
/* Define to 1 if on MINIX. */
|
||||
/* #undef _MINIX */
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
/* This is the public header file for the PCRE library, second API, to be
|
||||
#included by applications that call PCRE2 functions.
|
||||
|
||||
Copyright (c) 2016-2019 University of Cambridge
|
||||
Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 34
|
||||
#define PCRE2_PRERELEASE
|
||||
#define PCRE2_DATE 2019-11-21
|
||||
#define PCRE2_MINOR 35
|
||||
#define PCRE2_PRERELEASE -RC1
|
||||
#define PCRE2_DATE 2020-04-15
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
|
@ -181,6 +181,9 @@ pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
|
|||
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
|
||||
#define PCRE2_NO_JIT 0x00002000u /* Not for pcre2_dfa_match() */
|
||||
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
|
||||
#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */
|
||||
#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */
|
||||
|
||||
/* Options for pcre2_pattern_convert(). */
|
||||
|
||||
|
@ -445,6 +448,7 @@ released, the numbers must not be changed. */
|
|||
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
|
||||
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
|
||||
#define PCRE2_CONFIG_TABLES_LENGTH 15
|
||||
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
|
|
@ -505,7 +505,7 @@ which case the base cannot be possessified.
|
|||
Arguments:
|
||||
code points to the byte code
|
||||
utf TRUE in UTF mode
|
||||
ucp TRUE in UCP mode
|
||||
ucp TRUE in UCP mode
|
||||
cb compile data block
|
||||
base_list the data list of the base opcode
|
||||
base_end the end of the base opcode
|
||||
|
@ -675,7 +675,7 @@ for(;;)
|
|||
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
|
||||
|
||||
next_code += 1 + LINK_SIZE;
|
||||
if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
|
||||
if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
|
||||
rec_limit))
|
||||
return FALSE;
|
||||
|
||||
|
@ -1134,7 +1134,7 @@ for (;;)
|
|||
get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
|
||||
list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
|
||||
|
||||
if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
|
||||
if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
|
||||
&rec_limit))
|
||||
{
|
||||
switch(c)
|
||||
|
|
|
@ -3527,14 +3527,14 @@ if ((re->flags & PCRE2_FIRSTSET) != 0)
|
|||
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
||||
{
|
||||
first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (first_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
|
||||
first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
|
||||
#else
|
||||
if (first_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
|
||||
first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
|
||||
#endif
|
||||
#endif
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
}
|
||||
|
@ -3553,10 +3553,10 @@ if ((re->flags & PCRE2_LASTSET) != 0)
|
|||
req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (req_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
|
||||
if (req_cu > 127 && !utf && (re->overall_options & PCRE2_UCP) != 0)
|
||||
req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
|
||||
#else
|
||||
if (req_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
|
||||
if (req_cu > 127 && (utf || (re->overall_options & PCRE2_UCP) != 0))
|
||||
req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
|
||||
#endif
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
|
|
@ -65,23 +65,23 @@ given, they are written in binary. */
|
|||
|
||||
static char *classlist[] =
|
||||
{
|
||||
"space", "xdigit", "digit", "upper", "lower",
|
||||
"word", "graph", "print", "punct", "cntrl"
|
||||
};
|
||||
"space", "xdigit", "digit", "upper", "lower",
|
||||
"word", "graph", "print", "punct", "cntrl"
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
/*************************************************
|
||||
* Usage *
|
||||
*************************************************/
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
{
|
||||
(void)fprintf(stderr,
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
{
|
||||
(void)fprintf(stderr,
|
||||
"Usage: pcre2_dftables [options] <output file>\n"
|
||||
" -b Write output in binary (default is source code)\n"
|
||||
" -L Use locale from LC_ALL (default is \"C\" locale)\n"
|
||||
" -L Use locale from LC_ALL (default is \"C\" locale)\n"
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -97,7 +97,7 @@ FILE *f;
|
|||
int i;
|
||||
int nclass = 0;
|
||||
BOOL binary = FALSE;
|
||||
char *env = "C";
|
||||
char *env = "C";
|
||||
const unsigned char *tables;
|
||||
const unsigned char *base_of_tables;
|
||||
|
||||
|
@ -107,40 +107,40 @@ for (i = 1; i < argc; i++)
|
|||
{
|
||||
unsigned char *arg = (unsigned char *)argv[i];
|
||||
if (*arg != '-') break;
|
||||
|
||||
|
||||
if (strcmp(arg, "-help") == 0 || strcmp(arg, "--help") == 0)
|
||||
{
|
||||
usage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
else if (strcmp(arg, "-L") == 0)
|
||||
{
|
||||
{
|
||||
if (setlocale(LC_ALL, "") == NULL)
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: setlocale() failed\n");
|
||||
return 1;
|
||||
return 1;
|
||||
}
|
||||
env = getenv("LC_ALL");
|
||||
}
|
||||
|
||||
env = getenv("LC_ALL");
|
||||
}
|
||||
|
||||
else if (strcmp(arg, "-b") == 0)
|
||||
binary = TRUE;
|
||||
|
||||
else
|
||||
|
||||
else
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: unrecognized option %s\n", arg);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (i != argc - 1)
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: one filename argument is required\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Make the tables */
|
||||
|
||||
/* Make the tables */
|
||||
|
||||
tables = maketables();
|
||||
base_of_tables = tables;
|
||||
|
@ -151,19 +151,19 @@ if (f == NULL)
|
|||
fprintf(stderr, "pcre2_dftables: failed to open %s for writing\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* If -b was specified, we write the tables in binary. */
|
||||
|
||||
if (binary)
|
||||
{
|
||||
int yield = 0;
|
||||
int yield = 0;
|
||||
size_t len = fwrite(tables, 1, TABLES_LENGTH, f);
|
||||
if (len != TABLES_LENGTH)
|
||||
{
|
||||
(void)fprintf(stderr, "pcre2_dftables: fwrite() returned wrong length %d "
|
||||
"instead of %d\n", (int)len, TABLES_LENGTH);
|
||||
yield = 1;
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
free((void *)base_of_tables);
|
||||
return yield;
|
||||
|
@ -181,9 +181,9 @@ the very long string otherwise. */
|
|||
"program. It contains character tables that are used when no external\n"
|
||||
"tables are passed to PCRE2 by the application that calls it. The tables\n"
|
||||
"are used only for characters whose code values are less than 256. */\n\n");
|
||||
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* This set of tables was written in the %s locale. */\n\n", env);
|
||||
"/* This set of tables was written in the %s locale. */\n\n", env);
|
||||
|
||||
(void)fprintf(f,
|
||||
"/* The pcre2_ftables program (which is distributed with PCRE2) can be used\n"
|
||||
|
@ -255,7 +255,7 @@ for (i = 0; i < cbit_length; i++)
|
|||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
if ((i & 31) == 0) (void)fprintf(f, "\n");
|
||||
if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]);
|
||||
if ((i & 24) == 8) (void)fprintf(f, " /* %s */", classlist[nclass++]);
|
||||
(void)fprintf(f, "\n ");
|
||||
}
|
||||
(void)fprintf(f, "0x%02x", *tables++);
|
||||
|
|
|
@ -1174,7 +1174,7 @@ while (cc < ccend)
|
|||
case OP_PRUNE_ARG:
|
||||
if (cc < assert_na_end)
|
||||
return FALSE;
|
||||
/* Fall through */
|
||||
/* Fall through */
|
||||
case OP_MARK:
|
||||
if (common->mark_ptr == 0)
|
||||
{
|
||||
|
|
|
@ -382,7 +382,7 @@ if (caseless)
|
|||
{
|
||||
#if defined SUPPORT_UNICODE
|
||||
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
|
||||
|
||||
|
||||
if (utf || (mb->poptions & PCRE2_UCP) != 0)
|
||||
{
|
||||
PCRE2_SPTR endptr = p + length;
|
||||
|
@ -395,24 +395,24 @@ if (caseless)
|
|||
sequence of two of the latter. It is important, therefore, to check the
|
||||
length along the reference, not along the subject (earlier code did this
|
||||
wrong). UCP without uses Unicode properties but without UTF encoding. */
|
||||
|
||||
|
||||
while (p < endptr)
|
||||
{
|
||||
uint32_t c, d;
|
||||
const ucd_record *ur;
|
||||
if (eptr >= mb->end_subject) return 1; /* Partial match */
|
||||
|
||||
|
||||
if (utf)
|
||||
{
|
||||
{
|
||||
GETCHARINC(c, eptr);
|
||||
GETCHARINC(d, p);
|
||||
}
|
||||
else
|
||||
{
|
||||
c = *eptr++;
|
||||
d = *p++;
|
||||
d = *p++;
|
||||
}
|
||||
|
||||
|
||||
ur = GET_UCD(d);
|
||||
if (c != d && c != (uint32_t)((int)d + ur->other_case))
|
||||
{
|
||||
|
|
|
@ -772,13 +772,13 @@ Arguments:
|
|||
p points to the first code unit of the character
|
||||
caseless TRUE if caseless
|
||||
utf TRUE for UTF mode
|
||||
ucp TRUE for UCP mode
|
||||
ucp TRUE for UCP mode
|
||||
|
||||
Returns: pointer after the character
|
||||
*/
|
||||
|
||||
static PCRE2_SPTR
|
||||
set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf,
|
||||
set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf,
|
||||
BOOL ucp)
|
||||
{
|
||||
uint32_t c = *p++; /* First code unit */
|
||||
|
@ -819,17 +819,17 @@ if (caseless)
|
|||
c = UCD_OTHERCASE(c);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf)
|
||||
{
|
||||
{
|
||||
PCRE2_UCHAR buff[6];
|
||||
(void)PRIV(ord2utf)(c, buff);
|
||||
SET_BIT(buff[0]);
|
||||
}
|
||||
else if (c < 256) SET_BIT(c);
|
||||
else if (c < 256) SET_BIT(c);
|
||||
#else /* 16-bit or 32-bit mode */
|
||||
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
|
@ -939,7 +939,7 @@ Arguments:
|
|||
re points to the compiled regex block
|
||||
code points to an expression
|
||||
utf TRUE if in UTF mode
|
||||
ucp TRUE if in UCP mode
|
||||
ucp TRUE if in UCP mode
|
||||
depthptr pointer to recurse depth
|
||||
|
||||
Returns: SSB_FAIL => Failed to find any starting code units
|
||||
|
@ -1706,7 +1706,7 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
|
|||
int b = -1;
|
||||
uint8_t *p = re->start_bitmap;
|
||||
uint32_t flags = PCRE2_FIRSTMAPSET;
|
||||
|
||||
|
||||
for (i = 0; i < 256; p++, i += 8)
|
||||
{
|
||||
uint8_t x = *p;
|
||||
|
@ -1736,7 +1736,7 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
|
|||
}
|
||||
|
||||
/* c contains the code unit value, in the range 0-255. In 8-bit UTF
|
||||
mode, only values < 128 can be used. In all the other cases, c is a
|
||||
mode, only values < 128 can be used. In all the other cases, c is a
|
||||
character value. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
@ -1746,10 +1746,10 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
|
|||
else if (b < 0) /* Second one found */
|
||||
{
|
||||
int d = TABLE_GET((unsigned int)c, re->tables + fcc_offset, c);
|
||||
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf || ucp)
|
||||
{
|
||||
{
|
||||
if (UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
|
||||
if (c > 127) d = UCD_OTHERCASE(c);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue