Final file tidies for 10.40
This commit is contained in:
parent
13be26a5c2
commit
3103b8f20a
6
AUTHORS
6
AUTHORS
|
@ -8,7 +8,7 @@ Email domain: gmail.com
|
|||
Retired from University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2021 University of Cambridge
|
||||
Copyright (c) 1997-2022 University of Cambridge
|
||||
All rights reserved
|
||||
|
||||
|
||||
|
@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2021 Zoltan Herczeg
|
||||
Copyright(c) 2010-2022 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Emain domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2021 Zoltan Herczeg
|
||||
Copyright(c) 2009-2022 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
####
|
||||
|
|
|
@ -1056,13 +1056,13 @@ IF(MSVC AND INSTALL_MSVC_PDB)
|
|||
INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2-8.pdb
|
||||
${PROJECT_BINARY_DIR}/pcre2-16.pdb
|
||||
${PROJECT_BINARY_DIR}/pcre2-32.pdb
|
||||
${PROJECT_BINARY_DIR}/pcre2-posix.pdb
|
||||
${PROJECT_BINARY_DIR}/pcre2-posix.pdb
|
||||
DESTINATION bin
|
||||
CONFIGURATIONS RelWithDebInfo)
|
||||
INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2-8d.pdb
|
||||
${PROJECT_BINARY_DIR}/pcre2-16d.pdb
|
||||
${PROJECT_BINARY_DIR}/pcre2-32d.pdb
|
||||
${PROJECT_BINARY_DIR}/pcre2-posixd.pdb
|
||||
${PROJECT_BINARY_DIR}/pcre2-posixd.pdb
|
||||
DESTINATION bin
|
||||
CONFIGURATIONS Debug)
|
||||
ENDIF(MSVC AND INSTALL_MSVC_PDB)
|
||||
|
|
24
ChangeLog
24
ChangeLog
|
@ -2,8 +2,8 @@ Change Log for PCRE2
|
|||
--------------------
|
||||
|
||||
|
||||
Version 10.40-RC1 xx-xxx-2021
|
||||
-----------------------------
|
||||
Version 10.40 15-April-2022
|
||||
---------------------------
|
||||
|
||||
1. Merged patch from @carenas (GitHub #35, 7db87842) to fix pcre2grep incorrect
|
||||
handling of multiple passes.
|
||||
|
@ -39,8 +39,8 @@ pcre2_substitute(), and the replacement argument of the latter, if the pointer
|
|||
is NULL and the length is zero, treat as an empty string. Apparently a number
|
||||
of applications treat NULL/0 in this way.
|
||||
|
||||
14. Added support for Bidi_Class and a number of binary Unicode properties,
|
||||
including Bidi_Control.
|
||||
14. Added support for Bidi_Class and a number of binary Unicode properties,
|
||||
including Bidi_Control.
|
||||
|
||||
15. Fix some minor issues raised by clang sanitize.
|
||||
|
||||
|
@ -58,13 +58,13 @@ including Bidi_Control.
|
|||
(c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being
|
||||
the same as \p{scx:scriptname} because this change happened in Perl at
|
||||
release 5.26.
|
||||
|
||||
(d) The standard Unicode 4-letter abbreviations for script names are now
|
||||
|
||||
(d) The standard Unicode 4-letter abbreviations for script names are now
|
||||
recognized.
|
||||
|
||||
|
||||
(e) In accordance with Unicode and Perl's "loose matching" rules, spaces,
|
||||
hyphens, and underscores are ignored in property names, which are then
|
||||
matched independent of case.
|
||||
hyphens, and underscores are ignored in property names, which are then
|
||||
matched independent of case.
|
||||
|
||||
18. The Python scripts in the maint directory have been refactored. There are
|
||||
now three scripts that generate pcre2_ucd.c, pcre2_ucp.h, and pcre2_ucptables.c
|
||||
|
@ -83,7 +83,7 @@ Clarke PR#72.
|
|||
|
||||
20. Added -LP and -LS listing options to pcre2test.
|
||||
|
||||
21. A user discovered that the library names in CMakeLists.txt for MSVC
|
||||
21. A user discovered that the library names in CMakeLists.txt for MSVC
|
||||
debugger (PDB) files were incorrect - perhaps never tried for PCRE2?
|
||||
|
||||
22. An item such as [Aa] is optimized into a caseless single character match.
|
||||
|
@ -98,12 +98,12 @@ fully read in caseless matching.
|
|||
24. Fixed an issue affecting recursions in JIT caused by duplicated data
|
||||
transfers.
|
||||
|
||||
25. Merged patch from @carenas (GitHub #96) which fixes some problems with
|
||||
25. Merged patch from @carenas (GitHub #96) which fixes some problems with
|
||||
pcre2test and readline/readedit:
|
||||
|
||||
* Use the right header for libedit in FreeBSD with autoconf
|
||||
* Really allow libedit with cmake
|
||||
* Avoid using readline headers with libedit
|
||||
* Avoid using readline headers with libedit
|
||||
|
||||
|
||||
Version 10.39 29-October-2021
|
||||
|
|
6
LICENCE
6
LICENCE
|
@ -26,7 +26,7 @@ Email domain: gmail.com
|
|||
Retired from University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2021 University of Cambridge
|
||||
Copyright (c) 1997-2022 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -37,7 +37,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright(c) 2010-2021 Zoltan Herczeg
|
||||
Copyright(c) 2010-2022 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
@ -48,7 +48,7 @@ Written by: Zoltan Herczeg
|
|||
Email local part: hzmester
|
||||
Email domain: freemail.hu
|
||||
|
||||
Copyright(c) 2009-2021 Zoltan Herczeg
|
||||
Copyright(c) 2009-2022 Zoltan Herczeg
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
|
|
@ -667,6 +667,7 @@ EXTRA_DIST += \
|
|||
testdata/testinput23 \
|
||||
testdata/testinput24 \
|
||||
testdata/testinput25 \
|
||||
testdata/testinput26 \
|
||||
testdata/testinputEBC \
|
||||
testdata/testoutput1 \
|
||||
testdata/testoutput2 \
|
||||
|
@ -709,6 +710,7 @@ EXTRA_DIST += \
|
|||
testdata/testoutput23 \
|
||||
testdata/testoutput24 \
|
||||
testdata/testoutput25 \
|
||||
testdata/testoutput26 \
|
||||
testdata/testoutputEBC \
|
||||
testdata/valgrind-jit.supp \
|
||||
testdata/wintestinput3 \
|
||||
|
|
32
NEWS
32
NEWS
|
@ -2,6 +2,38 @@ News about PCRE2 releases
|
|||
-------------------------
|
||||
|
||||
|
||||
Version 10.40 15-April-2022
|
||||
---------------------------
|
||||
|
||||
This is mostly a bug-fixing and code-tidying release. However, there are some
|
||||
extensions to Unicode property handling:
|
||||
|
||||
* Added support for Bidi_Class and a number of binary Unicode properties,
|
||||
including Bidi_Control.
|
||||
|
||||
* A number of changes to script matching for \p and \P:
|
||||
|
||||
(a) Script extensions for a character are now coded as a bitmap instead of
|
||||
a list of script numbers, which should be faster and does not need a
|
||||
loop.
|
||||
|
||||
(b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms
|
||||
sc and scx).
|
||||
|
||||
(c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being
|
||||
the same as \p{scx:scriptname} because this change happened in Perl at
|
||||
release 5.26.
|
||||
|
||||
(d) The standard Unicode 4-letter abbreviations for script names are now
|
||||
recognized.
|
||||
|
||||
(e) In accordance with Unicode and Perl's "loose matching" rules, spaces,
|
||||
hyphens, and underscores are ignored in property names, which are then
|
||||
matched independent of case.
|
||||
|
||||
As always, see ChangeLog for a list of all changes (also the Git log).
|
||||
|
||||
|
||||
Version 10.39 29-October-2021
|
||||
-----------------------------
|
||||
|
||||
|
|
24
README
24
README
|
@ -120,7 +120,7 @@ to the directory where you want the files to be created. This command is a
|
|||
standard GNU "autoconf" configuration script, for which generic instructions
|
||||
are supplied in the file INSTALL.
|
||||
|
||||
The files in the GitHub repository do not contain "configure". If you have
|
||||
The files in the GitHub repository do not contain "configure". If you have
|
||||
downloaded the PCRE2 source files from GitHub, before you can run "configure"
|
||||
you must run the shell script called autogen.sh. This runs a number of
|
||||
autotools to create a "configure" script (you must of course have the autotools
|
||||
|
@ -194,10 +194,10 @@ library. They are also documented in the pcre2build man page.
|
|||
|
||||
As well as supporting UTF strings, Unicode support includes support for the
|
||||
\P, \p, and \X sequences that recognize Unicode character properties.
|
||||
However, only the basic two-letter properties such as Lu are supported.
|
||||
Escape sequences such as \d and \w in patterns do not by default make use of
|
||||
Unicode properties, but can be made to do so by setting the PCRE2_UCP option
|
||||
or starting a pattern with (*UCP).
|
||||
However, only a subset of Unicode properties are supported; see the
|
||||
pcre2pattern man page for details. Escape sequences such as \d and \w in
|
||||
patterns do not by default make use of Unicode properties, but can be made to
|
||||
do so by setting the PCRE2_UCP option or starting a pattern with (*UCP).
|
||||
|
||||
. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
|
||||
of the preceding, or any of the Unicode newline sequences, or the NUL (zero)
|
||||
|
@ -417,7 +417,7 @@ The "configure" script builds the following files for the basic C library:
|
|||
. Makefile the makefile that builds the library
|
||||
. src/config.h build-time configuration options for the library
|
||||
. src/pcre2.h the public PCRE2 header file
|
||||
. pcre2-config script that shows the building settings such as CFLAGS
|
||||
. pcre2-config script that shows the building settings such as CFLAGS
|
||||
that were set for "configure"
|
||||
. libpcre2-8.pc )
|
||||
. libpcre2-16.pc ) data for the pkg-config command
|
||||
|
@ -577,9 +577,9 @@ at build time" for more details.
|
|||
Making new tarballs
|
||||
-------------------
|
||||
|
||||
The command "make dist" creates two PCRE2 tarballs, in tar.gz and zip formats.
|
||||
The command "make distcheck" does the same, but then does a trial build of the
|
||||
new distribution to ensure that it works.
|
||||
The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
|
||||
zip formats. The command "make distcheck" does the same, but then does a trial
|
||||
build of the new distribution to ensure that it works.
|
||||
|
||||
If you have modified any of the man page sources in the doc directory, you
|
||||
should first run the PrepareRelease script before making a distribution. This
|
||||
|
@ -608,13 +608,13 @@ is available. RunTest outputs a comment when it skips a test.
|
|||
|
||||
Many (but not all) of the tests that are not skipped are run twice if JIT
|
||||
support is available. On the second run, JIT compilation is forced. This
|
||||
testing can be suppressed by putting "nojit" on the RunTest command line.
|
||||
testing can be suppressed by putting "-nojit" on the RunTest command line.
|
||||
|
||||
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
|
||||
libraries that are enabled. If you want to run just one set of tests, call
|
||||
RunTest with either the -8, -16 or -32 option.
|
||||
|
||||
If valgrind is installed, you can run the tests under it by putting "valgrind"
|
||||
If valgrind is installed, you can run the tests under it by putting "-valgrind"
|
||||
on the RunTest command line. To run pcre2test on just one or more specific test
|
||||
files, give their numbers as arguments to RunTest, for example:
|
||||
|
||||
|
@ -911,4 +911,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
Last updated: 13 April 2022
|
||||
Last updated: 15 April 2022
|
||||
|
|
12
configure.ac
12
configure.ac
|
@ -10,14 +10,14 @@ dnl be defined as -RC2, for example. For real releases, it should be empty.
|
|||
|
||||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [40])
|
||||
m4_define(pcre2_prerelease, [-RC1])
|
||||
m4_define(pcre2_date, [2021-11-09])
|
||||
m4_define(pcre2_prerelease, [])
|
||||
m4_define(pcre2_date, [2022-04-14])
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre2_8_version, [10:4:10])
|
||||
m4_define(libpcre2_16_version, [10:4:10])
|
||||
m4_define(libpcre2_32_version, [10:4:10])
|
||||
m4_define(libpcre2_posix_version, [3:1:0])
|
||||
m4_define(libpcre2_8_version, [11:0:11])
|
||||
m4_define(libpcre2_16_version, [11:0:11])
|
||||
m4_define(libpcre2_32_version, [11:0:11])
|
||||
m4_define(libpcre2_posix_version, [3:2:0])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
|
|
@ -114,12 +114,18 @@ Building PCRE2 using autotools
|
|||
The following instructions assume the use of the widely used "configure; make;
|
||||
make install" (autotools) process.
|
||||
|
||||
To build PCRE2 on system that supports autotools, first run the "configure"
|
||||
command from the PCRE2 distribution directory, with your current directory set
|
||||
If you have downloaded and unpacked a PCRE2 release tarball, run the
|
||||
"configure" command from the PCRE2 directory, with your current directory set
|
||||
to the directory where you want the files to be created. This command is a
|
||||
standard GNU "autoconf" configuration script, for which generic instructions
|
||||
are supplied in the file INSTALL.
|
||||
|
||||
The files in the GitHub repository do not contain "configure". If you have
|
||||
downloaded the PCRE2 source files from GitHub, before you can run "configure"
|
||||
you must run the shell script called autogen.sh. This runs a number of
|
||||
autotools to create a "configure" script (you must of course have the autotools
|
||||
commands installed in order to do this).
|
||||
|
||||
Most commonly, people build PCRE2 within its own distribution directory, and in
|
||||
this case, on many systems, just running "./configure" is sufficient. However,
|
||||
the usual methods of changing standard defaults are available. For example:
|
||||
|
@ -188,10 +194,10 @@ library. They are also documented in the pcre2build man page.
|
|||
|
||||
As well as supporting UTF strings, Unicode support includes support for the
|
||||
\P, \p, and \X sequences that recognize Unicode character properties.
|
||||
However, only the basic two-letter properties such as Lu are supported.
|
||||
Escape sequences such as \d and \w in patterns do not by default make use of
|
||||
Unicode properties, but can be made to do so by setting the PCRE2_UCP option
|
||||
or starting a pattern with (*UCP).
|
||||
However, only a subset of Unicode properties are supported; see the
|
||||
pcre2pattern man page for details. Escape sequences such as \d and \w in
|
||||
patterns do not by default make use of Unicode properties, but can be made to
|
||||
do so by setting the PCRE2_UCP option or starting a pattern with (*UCP).
|
||||
|
||||
. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
|
||||
of the preceding, or any of the Unicode newline sequences, or the NUL (zero)
|
||||
|
@ -411,7 +417,7 @@ The "configure" script builds the following files for the basic C library:
|
|||
. Makefile the makefile that builds the library
|
||||
. src/config.h build-time configuration options for the library
|
||||
. src/pcre2.h the public PCRE2 header file
|
||||
. pcre2-config script that shows the building settings such as CFLAGS
|
||||
. pcre2-config script that shows the building settings such as CFLAGS
|
||||
that were set for "configure"
|
||||
. libpcre2-8.pc )
|
||||
. libpcre2-16.pc ) data for the pkg-config command
|
||||
|
@ -571,9 +577,9 @@ at build time" for more details.
|
|||
Making new tarballs
|
||||
-------------------
|
||||
|
||||
The command "make dist" creates two PCRE2 tarballs, in tar.gz and zip formats.
|
||||
The command "make distcheck" does the same, but then does a trial build of the
|
||||
new distribution to ensure that it works.
|
||||
The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
|
||||
zip formats. The command "make distcheck" does the same, but then does a trial
|
||||
build of the new distribution to ensure that it works.
|
||||
|
||||
If you have modified any of the man page sources in the doc directory, you
|
||||
should first run the PrepareRelease script before making a distribution. This
|
||||
|
@ -602,13 +608,13 @@ is available. RunTest outputs a comment when it skips a test.
|
|||
|
||||
Many (but not all) of the tests that are not skipped are run twice if JIT
|
||||
support is available. On the second run, JIT compilation is forced. This
|
||||
testing can be suppressed by putting "nojit" on the RunTest command line.
|
||||
testing can be suppressed by putting "-nojit" on the RunTest command line.
|
||||
|
||||
The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
|
||||
libraries that are enabled. If you want to run just one set of tests, call
|
||||
RunTest with either the -8, -16 or -32 option.
|
||||
|
||||
If valgrind is installed, you can run the tests under it by putting "valgrind"
|
||||
If valgrind is installed, you can run the tests under it by putting "-valgrind"
|
||||
on the RunTest command line. To run pcre2test on just one or more specific test
|
||||
files, give their numbers as arguments to RunTest, for example:
|
||||
|
||||
|
@ -905,4 +911,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: Philip.Hazel
|
||||
Email domain: gmail.com
|
||||
Last updated: 29 October 2021
|
||||
Last updated: 15 April 2022
|
||||
|
|
|
@ -2640,8 +2640,8 @@ The subject string is passed to <b>pcre2_match()</b> as a pointer in
|
|||
<i>startoffset</i>. The length and offset are in code units, not characters.
|
||||
That is, they are in bytes for the 8-bit library, 16-bit code units for the
|
||||
16-bit library, and 32-bit code units for the 32-bit library, whether or not
|
||||
UTF processing is enabled. As a special case, if <i>subject</i> is NULL and
|
||||
<i>length</i> is zero, the subject is assumed to be an empty string. If
|
||||
UTF processing is enabled. As a special case, if <i>subject</i> is NULL and
|
||||
<i>length</i> is zero, the subject is assumed to be an empty string. If
|
||||
<i>length</i> is non-zero, an error occurs if <i>subject</i> is NULL.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -776,7 +776,7 @@ can be used in any mode, though in 8-bit and 16-bit non-UTF modes these
|
|||
sequences are of course limited to testing characters whose code points are
|
||||
less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points
|
||||
greater than 0x10ffff (the Unicode limit) may be encountered. These are all
|
||||
treated as being in the Unknown script and with an unassigned type.
|
||||
treated as being in the Unknown script and with an unassigned type.
|
||||
</P>
|
||||
<P>
|
||||
Matching characters by Unicode property is not fast, because PCRE2 has to do a
|
||||
|
@ -821,8 +821,8 @@ interpretation at release 5.26 and PCRE2 changed at release 10.40.
|
|||
<P>
|
||||
Unassigned characters (and in non-UTF 32-bit mode, characters with code points
|
||||
greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not
|
||||
part of an identified script are lumped together as "Common". The current list
|
||||
of recognized script names and their 4-character abbreviations can be obtained
|
||||
part of an identified script are lumped together as "Common". The current list
|
||||
of recognized script names and their 4-character abbreviations can be obtained
|
||||
by running this command:
|
||||
<pre>
|
||||
pcre2test -LS
|
||||
|
|
|
@ -241,7 +241,7 @@ The recognized classes are:
|
|||
ES European separator
|
||||
ET European terminator
|
||||
FSI first strong isolate
|
||||
L left-to-right
|
||||
L left-to-right
|
||||
LRE left-to-right embedding
|
||||
LRI left-to-right isolate
|
||||
LRO left-to-right override
|
||||
|
@ -254,7 +254,7 @@ The recognized classes are:
|
|||
RLI right-to-left isolate
|
||||
RLO right-to-left override
|
||||
S segment separator
|
||||
WS which space
|
||||
WS which space
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">CHARACTER CLASSES</a><br>
|
||||
|
|
|
@ -1252,7 +1252,7 @@ pattern, but can be overridden by modifiers on the subject.
|
|||
memory show heap memory usage
|
||||
null_context match with a NULL context
|
||||
null_replacement substitute with NULL replacement
|
||||
null_subject match with NULL subject
|
||||
null_subject match with NULL subject
|
||||
offset=<n> set starting offset
|
||||
offset_limit=<n> set offset limit
|
||||
ovector=<n> set size of output vector
|
||||
|
@ -1693,8 +1693,8 @@ case (they use default values). This modifier cannot be used with the
|
|||
<b>find_limits</b> or <b>substitute_callout</b> modifiers.
|
||||
</P>
|
||||
<P>
|
||||
Similarly, for testing purposes, if the <b>null_subject</b> or
|
||||
<b>null_replacement</b> modifier is set, the subject or replacement string
|
||||
Similarly, for testing purposes, if the <b>null_subject</b> or
|
||||
<b>null_replacement</b> modifier is set, the subject or replacement string
|
||||
pointers are passed as NULL, respectively, to the relevant functions.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
||||
|
|
|
@ -185,8 +185,8 @@ REVISION
|
|||
Last updated: 27 August 2021
|
||||
Copyright (c) 1997-2021 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2API(3) Library Functions Manual PCRE2API(3)
|
||||
|
||||
|
||||
|
@ -3861,8 +3861,8 @@ REVISION
|
|||
Last updated: 14 December 2021
|
||||
Copyright (c) 1997-2021 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3)
|
||||
|
||||
|
||||
|
@ -4457,8 +4457,8 @@ REVISION
|
|||
Last updated: 08 December 2021
|
||||
Copyright (c) 1997-2021 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3)
|
||||
|
||||
|
||||
|
@ -4887,8 +4887,8 @@ REVISION
|
|||
Last updated: 03 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3)
|
||||
|
||||
|
||||
|
@ -5110,8 +5110,8 @@ REVISION
|
|||
Last updated: 08 December 2021
|
||||
Copyright (c) 1997-2021 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2JIT(3) Library Functions Manual PCRE2JIT(3)
|
||||
|
||||
|
||||
|
@ -5537,8 +5537,8 @@ REVISION
|
|||
Last updated: 30 November 2021
|
||||
Copyright (c) 1997-2021 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3)
|
||||
|
||||
|
||||
|
@ -5607,8 +5607,8 @@ REVISION
|
|||
Last updated: 02 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3)
|
||||
|
||||
|
||||
|
@ -5832,8 +5832,8 @@ REVISION
|
|||
Last updated: 28 August 2021
|
||||
Copyright (c) 1997-2021 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3)
|
||||
|
||||
|
||||
|
@ -6212,8 +6212,8 @@ REVISION
|
|||
Last updated: 04 September 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3)
|
||||
|
||||
|
||||
|
@ -9698,8 +9698,8 @@ REVISION
|
|||
Last updated: 12 January 2022
|
||||
Copyright (c) 1997-2022 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3)
|
||||
|
||||
|
||||
|
@ -9933,8 +9933,8 @@ REVISION
|
|||
Last updated: 03 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2POSIX(3) Library Functions Manual PCRE2POSIX(3)
|
||||
|
||||
|
||||
|
@ -10267,8 +10267,8 @@ REVISION
|
|||
Last updated: 26 April 2021
|
||||
Copyright (c) 1997-2021 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3)
|
||||
|
||||
|
||||
|
@ -10545,8 +10545,8 @@ REVISION
|
|||
Last updated: 27 June 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2SYNTAX(3) Library Functions Manual PCRE2SYNTAX(3)
|
||||
|
||||
|
||||
|
@ -11093,8 +11093,8 @@ REVISION
|
|||
Last updated: 12 January 2022
|
||||
Copyright (c) 1997-2022 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3)
|
||||
|
||||
|
||||
|
@ -11530,5 +11530,5 @@ REVISION
|
|||
Last updated: 22 December 2021
|
||||
Copyright (c) 1997-2021 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -2624,8 +2624,8 @@ The subject string is passed to \fBpcre2_match()\fP as a pointer in
|
|||
\fIstartoffset\fP. The length and offset are in code units, not characters.
|
||||
That is, they are in bytes for the 8-bit library, 16-bit code units for the
|
||||
16-bit library, and 32-bit code units for the 32-bit library, whether or not
|
||||
UTF processing is enabled. As a special case, if \fIsubject\fP is NULL and
|
||||
\fIlength\fP is zero, the subject is assumed to be an empty string. If
|
||||
UTF processing is enabled. As a special case, if \fIsubject\fP is NULL and
|
||||
\fIlength\fP is zero, the subject is assumed to be an empty string. If
|
||||
\fIlength\fP is non-zero, an error occurs if \fIsubject\fP is NULL.
|
||||
.P
|
||||
If \fIstartoffset\fP is greater than the length of the subject,
|
||||
|
|
|
@ -772,7 +772,7 @@ can be used in any mode, though in 8-bit and 16-bit non-UTF modes these
|
|||
sequences are of course limited to testing characters whose code points are
|
||||
less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points
|
||||
greater than 0x10ffff (the Unicode limit) may be encountered. These are all
|
||||
treated as being in the Unknown script and with an unassigned type.
|
||||
treated as being in the Unknown script and with an unassigned type.
|
||||
.P
|
||||
Matching characters by Unicode property is not fast, because PCRE2 has to do a
|
||||
multistage table lookup in order to find a character's property. That is why
|
||||
|
@ -818,12 +818,12 @@ interpretation at release 5.26 and PCRE2 changed at release 10.40.
|
|||
.P
|
||||
Unassigned characters (and in non-UTF 32-bit mode, characters with code points
|
||||
greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not
|
||||
part of an identified script are lumped together as "Common". The current list
|
||||
of recognized script names and their 4-character abbreviations can be obtained
|
||||
part of an identified script are lumped together as "Common". The current list
|
||||
of recognized script names and their 4-character abbreviations can be obtained
|
||||
by running this command:
|
||||
.sp
|
||||
pcre2test -LS
|
||||
.sp
|
||||
.sp
|
||||
.
|
||||
.
|
||||
.
|
||||
|
|
|
@ -211,7 +211,7 @@ The recognized classes are:
|
|||
ES European separator
|
||||
ET European terminator
|
||||
FSI first strong isolate
|
||||
L left-to-right
|
||||
L left-to-right
|
||||
LRE left-to-right embedding
|
||||
LRI left-to-right isolate
|
||||
LRO left-to-right override
|
||||
|
@ -224,7 +224,7 @@ The recognized classes are:
|
|||
RLI right-to-left isolate
|
||||
RLO right-to-left override
|
||||
S segment separator
|
||||
WS which space
|
||||
WS which space
|
||||
.
|
||||
.
|
||||
.SH "CHARACTER CLASSES"
|
||||
|
|
|
@ -1217,7 +1217,7 @@ pattern, but can be overridden by modifiers on the subject.
|
|||
memory show heap memory usage
|
||||
null_context match with a NULL context
|
||||
null_replacement substitute with NULL replacement
|
||||
null_subject match with NULL subject
|
||||
null_subject match with NULL subject
|
||||
offset=<n> set starting offset
|
||||
offset_limit=<n> set offset limit
|
||||
ovector=<n> set size of output vector
|
||||
|
@ -1651,8 +1651,8 @@ testing that the matching and substitution functions behave correctly in this
|
|||
case (they use default values). This modifier cannot be used with the
|
||||
\fBfind_limits\fP or \fBsubstitute_callout\fP modifiers.
|
||||
.P
|
||||
Similarly, for testing purposes, if the \fBnull_subject\fP or
|
||||
\fBnull_replacement\fP modifier is set, the subject or replacement string
|
||||
Similarly, for testing purposes, if the \fBnull_subject\fP or
|
||||
\fBnull_replacement\fP modifier is set, the subject or replacement string
|
||||
pointers are passed as NULL, respectively, to the relevant functions.
|
||||
.
|
||||
.
|
||||
|
|
|
@ -97,6 +97,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Have PTHREAD_PRIO_INHERIT. */
|
||||
/* #undef HAVE_PTHREAD_PRIO_INHERIT */
|
||||
|
||||
/* Define to 1 if you have the <readline.h> header file. */
|
||||
/* #undef HAVE_READLINE_H */
|
||||
|
||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||
/* #undef HAVE_READLINE_HISTORY_H */
|
||||
|
||||
|
@ -233,7 +236,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.39"
|
||||
#define PACKAGE_STRING "PCRE2 10.40"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
@ -242,7 +245,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.39"
|
||||
#define PACKAGE_VERSION "10.40"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
|
@ -435,7 +438,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#endif
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.39"
|
||||
#define VERSION "10.40"
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
|
|
@ -97,6 +97,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Have PTHREAD_PRIO_INHERIT. */
|
||||
#undef HAVE_PTHREAD_PRIO_INHERIT
|
||||
|
||||
/* Define to 1 if you have the <readline.h> header file. */
|
||||
#undef HAVE_READLINE_H
|
||||
|
||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||
#undef HAVE_READLINE_HISTORY_H
|
||||
|
||||
|
|
|
@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 39
|
||||
#define PCRE2_MINOR 40
|
||||
#define PCRE2_PRERELEASE
|
||||
#define PCRE2_DATE 2021-10-29
|
||||
#define PCRE2_DATE 2022-04-14
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
|
|
|
@ -7605,7 +7605,7 @@ while (*cc != XCL_END)
|
|||
break;
|
||||
}
|
||||
compares++;
|
||||
/* Fall through */
|
||||
/* Fall through */
|
||||
|
||||
case PT_SC:
|
||||
unicode_status |= XCLASS_HAS_SCRIPT;
|
||||
|
@ -7846,7 +7846,7 @@ if (unicode_status & XCLASS_NEEDS_UCD)
|
|||
case PT_SCX:
|
||||
if (cc[-1] == XCL_NOTPROP)
|
||||
break;
|
||||
/* Fall through */
|
||||
/* Fall through */
|
||||
|
||||
case PT_SC:
|
||||
compares--;
|
||||
|
|
|
@ -230,7 +230,7 @@ for (; len > 0; len--)
|
|||
/* When there is no UTF/UCP support, the table of names does not exist. This
|
||||
function should not be called in such configurations, because a pattern that
|
||||
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
|
||||
into the main code, however, we just put one into this function.
|
||||
into the main code, however, we just put one into this function.
|
||||
|
||||
Now that the table contains both full names and their abbreviations, we do some
|
||||
fiddling to try to get the full name, which is either the longer of two found
|
||||
|
@ -248,28 +248,28 @@ unsigned int ptypex = (ptype == PT_SC)? PT_SCX : ptype;
|
|||
for (int i = PRIV(utt_size) - 1; i >= 0; i--)
|
||||
{
|
||||
const ucp_type_table *u = PRIV(utt) + i;
|
||||
|
||||
if ((ptype == u->type || ptypex == u->type) && pvalue == u->value)
|
||||
|
||||
if ((ptype == u->type || ptypex == u->type) && pvalue == u->value)
|
||||
{
|
||||
const char *s = PRIV(utt_names) + u->name_offset;
|
||||
size_t sl = strlen(s);
|
||||
|
||||
|
||||
if (sl == 3 && (u->type == PT_SC || u->type == PT_SCX))
|
||||
{
|
||||
yield = s;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (sl > len)
|
||||
{
|
||||
yield = s;
|
||||
len = sl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (++count >= 2) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return yield;
|
||||
|
||||
#else /* No UTF support */
|
||||
|
@ -303,7 +303,7 @@ print_prop(FILE *f, PCRE2_SPTR code, const char *before, const char *after)
|
|||
{
|
||||
if (code[1] != PT_CLIST)
|
||||
{
|
||||
const char *sc = (code[1] == PT_SC)? "script:" : "";
|
||||
const char *sc = (code[1] == PT_SC)? "script:" : "";
|
||||
const char *s = get_ucpname(code[1], code[2]);
|
||||
fprintf(f, "%s%s %s%c%s%s", before, OP_names[*code], sc, toupper(s[0]), s+1, after);
|
||||
}
|
||||
|
@ -755,7 +755,7 @@ for(;;)
|
|||
{
|
||||
unsigned int ptype = *ccode++;
|
||||
unsigned int pvalue = *ccode++;
|
||||
const char *s;
|
||||
const char *s;
|
||||
|
||||
switch(ptype)
|
||||
{
|
||||
|
@ -772,7 +772,7 @@ for(;;)
|
|||
break;
|
||||
|
||||
default:
|
||||
s = get_ucpname(ptype, pvalue);
|
||||
s = get_ucpname(ptype, pvalue);
|
||||
fprintf(f, "\\%c{%c%s}", (not? 'P':'p'), toupper(s[0]), s+1);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -119,8 +119,8 @@ const uint8_t PRIV(utf8_table4)[] = {
|
|||
|
||||
#endif /* UTF-8 support needed */
|
||||
|
||||
/* Tables concerned with Unicode properties are relevant only when Unicode
|
||||
support is enabled. See also the pcre2_ucptables.c file, which is generated by
|
||||
/* Tables concerned with Unicode properties are relevant only when Unicode
|
||||
support is enabled. See also the pcre2_ucptables.c file, which is generated by
|
||||
a Python script from Unicode data files. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
@ -224,7 +224,7 @@ const int PRIV(ucp_typerange)[] = {
|
|||
};
|
||||
#endif /* SUPPORT_JIT */
|
||||
|
||||
/* Finally, include the tables that are auto-generated from the Unicode data
|
||||
/* Finally, include the tables that are auto-generated from the Unicode data
|
||||
files. */
|
||||
|
||||
#include "pcre2_ucptables.c"
|
||||
|
|
|
@ -135,7 +135,7 @@ while ((t = *data++) != XCL_END)
|
|||
{
|
||||
const ucd_record *prop = GET_UCD(c);
|
||||
BOOL isprop = t == XCL_PROP;
|
||||
BOOL ok;
|
||||
BOOL ok;
|
||||
|
||||
switch(*data)
|
||||
{
|
||||
|
@ -213,17 +213,17 @@ while ((t = *data++) != XCL_END)
|
|||
return !negated;
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
case PT_BIDICL:
|
||||
if ((UCD_BIDICLASS_PROP(prop) == data[1]) == isprop)
|
||||
return !negated;
|
||||
break;
|
||||
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), data[1]) != 0;
|
||||
if (ok == isprop) return !negated;
|
||||
break;
|
||||
break;
|
||||
|
||||
/* The following three properties can occur only in an XCLASS, as there
|
||||
is no \p or \P coding for them. */
|
||||
|
|
|
@ -8502,6 +8502,7 @@ static void
|
|||
display_properties(BOOL wantscripts)
|
||||
{
|
||||
#ifndef SUPPORT_UNICODE
|
||||
(void)wantscripts;
|
||||
printf("** This version of PCRE2 was compiled without Unicode support.\n");
|
||||
#else
|
||||
|
||||
|
|
Loading…
Reference in New Issue