From 4e4f273f07501f813947c95ad8ac7a593906b2e2 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Tue, 16 Apr 2019 15:34:27 +0000 Subject: [PATCH] Final file tidies for 10.33. --- ChangeLog | 9 +++-- NEWS | 2 +- README | 26 +++++++------- configure.ac | 4 +-- doc/html/README.txt | 26 +++++++------- doc/html/pcre2_jit_compile.html | 2 +- doc/html/pcre2jit.html | 8 ++--- doc/html/pcre2unicode.html | 4 +-- doc/pcre2.txt | 60 ++++++++++++++++----------------- doc/pcre2_jit_compile.3 | 2 +- doc/pcre2jit.3 | 8 ++--- doc/pcre2unicode.3 | 4 +-- src/config.h.generic | 6 ++-- src/pcre2.h.generic | 4 +-- src/pcre2_compile.c | 12 +++---- src/pcre2_printint.c | 2 +- 16 files changed, 94 insertions(+), 85 deletions(-) diff --git a/ChangeLog b/ChangeLog index df46a58..66c6d0b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,7 +2,7 @@ Change Log for PCRE2 -------------------- -Version 10.33 11-March-2019 +Version 10.33 16-April-2019 --------------------------- 1. Added "allvector" to pcre2test to make it easy to check the part of the @@ -149,9 +149,14 @@ from Ross Burton. 36. Disable SSE2 JIT optimizations in x86 CPUs when SSE2 is not available. Patch by Guillem Jover. -37. Changed expressions such as 1<<10 to 1u<<10 in many places because compiler +37. Changed expressions such as 1<<10 to 1u<<10 in many places because compiler warnings were reported. +38. Using the clang compiler with sanitizing options causes runtime complaints +about truncation for statments such as x = ~x when x is an 8-bit value; it +seems to compute ~x as a 32-bit value. Changing such statements to x = 255 ^ x +gets rid of the warnings. There were also two missing casts in pcre2test. + Version 10.32 10-September-2018 ------------------------------- diff --git a/NEWS b/NEWS index c6a7b0e..bd0df86 100644 --- a/NEWS +++ b/NEWS @@ -2,7 +2,7 @@ News about PCRE2 releases ------------------------- -Version 10.33-RC1 03-March-2019 +Version 10.33-RC1 16-April-2019 ------------------------------- Yet more bugfixes, tidies, and a few enhancements, summarized here (see diff --git a/README b/README index 2a26f9d..ff9a6af 100644 --- a/README +++ b/README @@ -1,9 +1,11 @@ README file for PCRE2 (Perl-compatible regular expression library) ------------------------------------------------------------------ -PCRE2 is a re-working of the original PCRE library to provide an entirely new -API. The latest release of PCRE2 is always available in three alternative -formats from: +PCRE2 is a re-working of the original PCRE1 library to provide an entirely new +API. Since its initial release in 2015, there has been further development of +the code and it now differs from PCRE1 in more than just the API. There are new +features and the internals have been improved. The latest release of PCRE2 is +always available in three alternative formats from: ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.gz ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.bz2 @@ -39,15 +41,15 @@ The PCRE2 APIs PCRE2 is written in C, and it has its own API. There are three sets of functions, one for the 8-bit library, which processes strings of bytes, one for the 16-bit library, which processes strings of 16-bit values, and one for the -32-bit library, which processes strings of 32-bit values. There are no C++ -wrappers. +32-bit library, which processes strings of 32-bit values. Unlike PCRE1, there +are no C++ wrappers. -In addition, the distribution contains a set of C wrapper functions for the -8-bit library that are based on the POSIX regular expression API (see the -pcre2posix man page). These are built into a library called libpcre2-posix. -Note that this just provides a POSIX calling interface to PCRE2; the regular -expressions themselves still follow Perl syntax and semantics. The POSIX API is -restricted, and does not give full access to all of PCRE2's facilities. +The distribution does contain a set of C wrapper functions for the 8-bit +library that are based on the POSIX regular expression API (see the pcre2posix +man page). These are built into a library called libpcre2-posix. Note that this +just provides a POSIX calling interface to PCRE2; the regular expressions +themselves still follow Perl syntax and semantics. The POSIX API is restricted, +and does not give full access to all of PCRE2's facilities. The header file for the POSIX-style functions is called pcre2posix.h. The official POSIX name is regex.h, but I did not want to risk possible problems @@ -890,4 +892,4 @@ The distribution should contain the files listed below. Philip Hazel Email local part: ph10 Email domain: cam.ac.uk -Last updated: 03 March 2019 +Last updated: 16 April 2019 diff --git a/configure.ac b/configure.ac index b725de5..93c2b53 100644 --- a/configure.ac +++ b/configure.ac @@ -10,8 +10,8 @@ dnl be defined as -RC2, for example. For real releases, it should be empty. m4_define(pcre2_major, [10]) m4_define(pcre2_minor, [33]) -m4_define(pcre2_prerelease, [-RC1]) -m4_define(pcre2_date, [2019-03-03]) +m4_define(pcre2_prerelease, []) +m4_define(pcre2_date, [2019-04-16]) # NOTE: The CMakeLists.txt file searches for the above variables in the first # 50 lines of this file. Please update that if the variables above are moved. diff --git a/doc/html/README.txt b/doc/html/README.txt index 2a26f9d..ff9a6af 100644 --- a/doc/html/README.txt +++ b/doc/html/README.txt @@ -1,9 +1,11 @@ README file for PCRE2 (Perl-compatible regular expression library) ------------------------------------------------------------------ -PCRE2 is a re-working of the original PCRE library to provide an entirely new -API. The latest release of PCRE2 is always available in three alternative -formats from: +PCRE2 is a re-working of the original PCRE1 library to provide an entirely new +API. Since its initial release in 2015, there has been further development of +the code and it now differs from PCRE1 in more than just the API. There are new +features and the internals have been improved. The latest release of PCRE2 is +always available in three alternative formats from: ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.gz ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.bz2 @@ -39,15 +41,15 @@ The PCRE2 APIs PCRE2 is written in C, and it has its own API. There are three sets of functions, one for the 8-bit library, which processes strings of bytes, one for the 16-bit library, which processes strings of 16-bit values, and one for the -32-bit library, which processes strings of 32-bit values. There are no C++ -wrappers. +32-bit library, which processes strings of 32-bit values. Unlike PCRE1, there +are no C++ wrappers. -In addition, the distribution contains a set of C wrapper functions for the -8-bit library that are based on the POSIX regular expression API (see the -pcre2posix man page). These are built into a library called libpcre2-posix. -Note that this just provides a POSIX calling interface to PCRE2; the regular -expressions themselves still follow Perl syntax and semantics. The POSIX API is -restricted, and does not give full access to all of PCRE2's facilities. +The distribution does contain a set of C wrapper functions for the 8-bit +library that are based on the POSIX regular expression API (see the pcre2posix +man page). These are built into a library called libpcre2-posix. Note that this +just provides a POSIX calling interface to PCRE2; the regular expressions +themselves still follow Perl syntax and semantics. The POSIX API is restricted, +and does not give full access to all of PCRE2's facilities. The header file for the POSIX-style functions is called pcre2posix.h. The official POSIX name is regex.h, but I did not want to risk possible problems @@ -890,4 +892,4 @@ The distribution should contain the files listed below. Philip Hazel Email local part: ph10 Email domain: cam.ac.uk -Last updated: 03 March 2019 +Last updated: 16 April 2019 diff --git a/doc/html/pcre2_jit_compile.html b/doc/html/pcre2_jit_compile.html index 2e13867..bbaa82c 100644 --- a/doc/html/pcre2_jit_compile.html +++ b/doc/html/pcre2_jit_compile.html @@ -40,7 +40,7 @@ bits: PCRE2_JIT_COMPLETE compile code for full matching PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching PCRE2_JIT_PARTIAL_HARD compile code for hard partial matching - PCRE2_JIT_INVALID_UTF compile code to handle invalid UTF + PCRE2_JIT_INVALID_UTF compile code to handle invalid UTF The yield of the function is 0 for success, or a negative error code otherwise. In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or diff --git a/doc/html/pcre2jit.html b/doc/html/pcre2jit.html index 0e33944..cb4eb88 100644 --- a/doc/html/pcre2jit.html +++ b/doc/html/pcre2jit.html @@ -155,11 +155,11 @@ via JIT. However, if the option PCRE2_JIT_INVALID_UTF is passed to UTF is compiled.

-In this mode, an invalid code unit sequence never matches any pattern item. It -does not match dot, it does not match \p{Any}, it does not even match negative +In this mode, an invalid code unit sequence never matches any pattern item. It +does not match dot, it does not match \p{Any}, it does not even match negative items such as [^X]. A lookbehind assertion fails if it encounters an invalid -sequence while moving the current point backwards. In other words, an invalid -UTF code unit sequence acts as a barrier which no match can cross. Reaching an +sequence while moving the current point backwards. In other words, an invalid +UTF code unit sequence acts as a barrier which no match can cross. Reaching an invalid sequence causes an immediate backtrack.

diff --git a/doc/html/pcre2unicode.html b/doc/html/pcre2unicode.html index 2ed2cdd..c11c7c2 100644 --- a/doc/html/pcre2unicode.html +++ b/doc/html/pcre2unicode.html @@ -263,9 +263,9 @@ only valid UTF code unit sequences.

If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result -is usually undefined and your program may crash or loop indefinitely. There is, +is usually undefined and your program may crash or loop indefinitely. There is, however, one mode of matching that can handle invalid UTF subject strings. This -is matching via the JIT optimization using the PCRE2_JIT_INVALID_UTF option +is matching via the JIT optimization using the PCRE2_JIT_INVALID_UTF option when calling pcre2_jit_compile(). For details, see the pcre2jit documentation. diff --git a/doc/pcre2.txt b/doc/pcre2.txt index 316ccf9..9c956e8 100644 --- a/doc/pcre2.txt +++ b/doc/pcre2.txt @@ -180,8 +180,8 @@ REVISION Last updated: 17 September 2018 Copyright (c) 1997-2018 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2API(3) Library Functions Manual PCRE2API(3) @@ -3681,8 +3681,8 @@ REVISION Last updated: 14 February 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3) @@ -4254,8 +4254,8 @@ REVISION Last updated: 03 March 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3) @@ -4685,8 +4685,8 @@ REVISION Last updated: 03 February 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3) @@ -4890,8 +4890,8 @@ REVISION Last updated: 12 February 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2JIT(3) Library Functions Manual PCRE2JIT(3) @@ -5313,8 +5313,8 @@ REVISION Last updated: 06 March 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3) @@ -5383,8 +5383,8 @@ REVISION Last updated: 02 February 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3) @@ -5604,8 +5604,8 @@ REVISION Last updated: 10 October 2018 Copyright (c) 1997-2018 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3) @@ -6044,8 +6044,8 @@ REVISION Last updated: 22 December 2014 Copyright (c) 1997-2014 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3) @@ -9388,8 +9388,8 @@ REVISION Last updated: 12 February 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3) @@ -9623,8 +9623,8 @@ REVISION Last updated: 03 February 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2POSIX(3) Library Functions Manual PCRE2POSIX(3) @@ -9953,8 +9953,8 @@ REVISION Last updated: 30 January 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3) @@ -10232,8 +10232,8 @@ REVISION Last updated: 27 June 2018 Copyright (c) 1997-2018 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2SYNTAX(3) Library Functions Manual PCRE2SYNTAX(3) @@ -10733,8 +10733,8 @@ REVISION Last updated: 11 February 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3) @@ -11109,5 +11109,5 @@ REVISION Last updated: 06 March 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + diff --git a/doc/pcre2_jit_compile.3 b/doc/pcre2_jit_compile.3 index c8d3fc0..23dd2d2 100644 --- a/doc/pcre2_jit_compile.3 +++ b/doc/pcre2_jit_compile.3 @@ -29,7 +29,7 @@ bits: PCRE2_JIT_COMPLETE compile code for full matching PCRE2_JIT_PARTIAL_SOFT compile code for soft partial matching PCRE2_JIT_PARTIAL_HARD compile code for hard partial matching - PCRE2_JIT_INVALID_UTF compile code to handle invalid UTF + PCRE2_JIT_INVALID_UTF compile code to handle invalid UTF .sp The yield of the function is 0 for success, or a negative error code otherwise. In particular, PCRE2_ERROR_JIT_BADOPTION is returned if JIT is not supported or diff --git a/doc/pcre2jit.3 b/doc/pcre2jit.3 index a05c756..b7ae2eb 100644 --- a/doc/pcre2jit.3 +++ b/doc/pcre2jit.3 @@ -130,11 +130,11 @@ via JIT. However, if the option PCRE2_JIT_INVALID_UTF is passed to \fBpcre2_jit_compile()\fP, code that can process a subject containing invalid UTF is compiled. .P -In this mode, an invalid code unit sequence never matches any pattern item. It -does not match dot, it does not match \ep{Any}, it does not even match negative +In this mode, an invalid code unit sequence never matches any pattern item. It +does not match dot, it does not match \ep{Any}, it does not even match negative items such as [^X]. A lookbehind assertion fails if it encounters an invalid -sequence while moving the current point backwards. In other words, an invalid -UTF code unit sequence acts as a barrier which no match can cross. Reaching an +sequence while moving the current point backwards. In other words, an invalid +UTF code unit sequence acts as a barrier which no match can cross. Reaching an invalid sequence causes an immediate backtrack. .P Using this option, an application can run matches in arbitrary data, knowing diff --git a/doc/pcre2unicode.3 b/doc/pcre2unicode.3 index 34a2c8d..56eb1ea 100644 --- a/doc/pcre2unicode.3 +++ b/doc/pcre2unicode.3 @@ -244,9 +244,9 @@ PCRE2 assumes that the pattern or subject it is given (respectively) contains only valid UTF code unit sequences. .P If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result -is usually undefined and your program may crash or loop indefinitely. There is, +is usually undefined and your program may crash or loop indefinitely. There is, however, one mode of matching that can handle invalid UTF subject strings. This -is matching via the JIT optimization using the PCRE2_JIT_INVALID_UTF option +is matching via the JIT optimization using the PCRE2_JIT_INVALID_UTF option when calling \fBpcre2_jit_compile()\fP. For details, see the .\" HREF \fBpcre2jit\fP diff --git a/src/config.h.generic b/src/config.h.generic index d2ed352..25d45ee 100644 --- a/src/config.h.generic +++ b/src/config.h.generic @@ -218,7 +218,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_NAME "PCRE2" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE2 10.33-RC1" +#define PACKAGE_STRING "PCRE2 10.33" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "pcre2" @@ -227,7 +227,7 @@ sure both macros are undefined; an emulation function will then be used. */ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "10.33-RC1" +#define PACKAGE_VERSION "10.33" /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested parentheses (of any kind) in a pattern. This limits the amount of system @@ -352,7 +352,7 @@ sure both macros are undefined; an emulation function will then be used. */ #endif /* Version number of package */ -#define VERSION "10.33-RC1" +#define VERSION "10.33" /* Define to 1 if on MINIX. */ /* #undef _MINIX */ diff --git a/src/pcre2.h.generic b/src/pcre2.h.generic index 201e314..102b5d9 100644 --- a/src/pcre2.h.generic +++ b/src/pcre2.h.generic @@ -43,8 +43,8 @@ POSSIBILITY OF SUCH DAMAGE. #define PCRE2_MAJOR 10 #define PCRE2_MINOR 33 -#define PCRE2_PRERELEASE -RC1 -#define PCRE2_DATE 2019-03-03 +#define PCRE2_PRERELEASE +#define PCRE2_DATE 2019-04-16 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index ab48d2d..068735a 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -5948,10 +5948,10 @@ for (;; pptr++) (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, CU2BYTES(class_uchardata - code)); if (negate_class && !xclass_has_prop) - { - /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ + { + /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; - } + } memcpy(code, classbits, 32); code = class_uchardata + (32 / sizeof(PCRE2_UCHAR)); } @@ -5974,10 +5974,10 @@ for (;; pptr++) if (lengthptr == NULL) /* Save time in the pre-compile phase */ { if (negate_class) - { - /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ + { + /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i]; - } + } memcpy(code, classbits, 32); } code += 32 / sizeof(PCRE2_UCHAR); diff --git a/src/pcre2_printint.c b/src/pcre2_printint.c index 36909ef..b132d44 100644 --- a/src/pcre2_printint.c +++ b/src/pcre2_printint.c @@ -673,7 +673,7 @@ for(;;) map = (uint8_t *)ccode; if (invertmap) { - /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ + /* Using 255 ^ instead of ~ avoids clang sanitize warning. */ for (i = 0; i < 32; i++) inverted_map[i] = 255 ^ map[i]; map = inverted_map; }