diff --git a/CMakeLists.txt b/CMakeLists.txt index 3355201..4da3850 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -382,21 +382,21 @@ SET(PCRE2_SOURCES ${PROJECT_BINARY_DIR}/pcre2_chartables.c src/pcre2_compile.c src/pcre2_config.c - src/pcre2_context.c + src/pcre2_context.c src/pcre2_dfa_match.c - src/pcre2_error.c + src/pcre2_error.c src/pcre2_jit_compile.c src/pcre2_jit_match.c src/pcre2_jit_misc.c src/pcre2_maketables.c src/pcre2_match.c - src/pcre2_match_data.c + src/pcre2_match_data.c src/pcre2_newline.c src/pcre2_ord2utf.c - src/pcre2_pattern_info.c + src/pcre2_pattern_info.c src/pcre2_string_utils.c src/pcre2_study.c - src/pcre2_substring.c + src/pcre2_substring.c src/pcre2_tables.c src/pcre2_ucd.c src/pcre2_valid_utf.c @@ -462,11 +462,11 @@ SET(targets) IF(PCRE2_BUILD_PCRE2_8) ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) SET_PROPERTY(TARGET pcre2-8 - PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) + PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) SET(targets ${targets} pcre2-8) ADD_LIBRARY(pcre2posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) SET_PROPERTY(TARGET pcre2posix - PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) + PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) SET(targets ${targets} pcre2posix) TARGET_LINK_LIBRARIES(pcre2posix pcre2-8) @@ -503,7 +503,7 @@ ENDIF(PCRE2_BUILD_PCRE2_16) IF(PCRE2_BUILD_PCRE2_32) ADD_LIBRARY(pcre2-32 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) SET_PROPERTY(TARGET pcre2-32 - PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32) + PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32) SET(targets ${targets} pcre2-32) IF(MINGW AND NOT PCRE2_STATIC) @@ -521,7 +521,7 @@ ENDIF(PCRE2_BUILD_PCRE2_32) IF(PCRE2_BUILD_PCRE2GREP) ADD_EXECUTABLE(pcre2grep src/pcre2grep.c) SET_PROPERTY(TARGET pcre2grep - PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) + PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) SET(targets ${targets} pcre2grep) TARGET_LINK_LIBRARIES(pcre2grep pcre2posix ${PCRE2GREP_LIBS}) ENDIF(PCRE2_BUILD_PCRE2GREP) diff --git a/ChangeLog b/ChangeLog index 6b3dd8e..8fb1583 100644 --- a/ChangeLog +++ b/ChangeLog @@ -5,41 +5,41 @@ Version 10.0 xx-xxxx-2014 ------------------------- Version 10.0 is the first release of PCRE2, a revised API for the PCRE library. -Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to +Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to item 20 for release 8.36. -The code of the library was heavily revised as part of the new API -implementation. Details of each and every modification were not individually -logged. In addition to the API changes, the following changes were made. They -are either new functionality, or bug fixes and other noticeable changes of +The code of the library was heavily revised as part of the new API +implementation. Details of each and every modification were not individually +logged. In addition to the API changes, the following changes were made. They +are either new functionality, or bug fixes and other noticeable changes of behaviour that were implemented after the code had been forked. -1. The test program, now called pcre2test, was re-specified and almost +1. The test program, now called pcre2test, was re-specified and almost completely re-written. Its input is not compatible with input for pcretest. 2. Patterns may start with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) to set the -PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is +PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is matched by that pattern. -3. For the benefit of those who use PCRE2 via some other application, that is, -not writing the function calls themselves, it is possible to check the PCRE2 -version by matching a pattern such as /(?(VERSION>=10.0)yes|no)/ against a +3. For the benefit of those who use PCRE2 via some other application, that is, +not writing the function calls themselves, it is possible to check the PCRE2 +version by matching a pattern such as /(?(VERSION>=10.0)yes|no)/ against a string such as "yesno". -4. There are case-equivalent Unicode characters whose encodings use different -numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is -theoretically possible for this to happen in UTF-16 too.) If a backreference to -a group containing one of these characters was greedily repeated, and during +4. There are case-equivalent Unicode characters whose encodings use different +numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is +theoretically possible for this to happen in UTF-16 too.) If a backreference to +a group containing one of these characters was greedily repeated, and during the match a backtrack occurred, the subject might be backtracked by the wrong -number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly -(and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should +number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly +(and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should capture the final character, which is the three bytes E2, B1, and A5 in UTF-8. -Incorrect backtracking meant that group 2 captured only the last two bytes. -This bug has been fixed; the new code is slower, but it is used only when the +Incorrect backtracking meant that group 2 captured only the last two bytes. +This bug has been fixed; the new code is slower, but it is used only when the strings matched by the repetition are not all the same length. 5. A pattern such as /()a/ was not setting the "first character must be 'a'" -information. This applied to any pattern with a group that matched no +information. This applied to any pattern with a group that matched no characters, for example: /(?:(?=.)|(?$file.rawtxt perl ../CleanTxt <$file.rawtxt >>pcre2.txt @@ -168,17 +168,13 @@ cd .. echo Documentation done if [ "$1" = "doc" ] ; then exit; fi -# FIXME pro tem only do docs -exit - # These files are detrailed; do not detrail the test data because there may be # significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF # line endings and the detrail script removes all trailing white space. The -# configure files are also omitted from the detrailing. +# configure files are also omitted from the detrailing. files="\ Makefile.am \ - Makefile.in \ configure.ac \ README \ LICENCE \ @@ -195,54 +191,45 @@ files="\ RunGrepTest \ RunTest \ pcre2-config.in \ - libpcre.pc.in \ - libpcre16.pc.in \ - libpcre32.pc.in \ - libpcreposix.pc.in \ - libpcrecpp.pc.in \ - config.h.in \ - pcre2_chartables.c.dist \ - pcre2demo.c \ - pcre2grep.c \ - pcre2test.c \ - dftables.c \ - pcre2posix.c \ - pcre2posix.h \ - pcre2.h.in \ - pcre2_internal.h \ - pcre2_byte_order.c \ - pcre2_compile.c \ - pcre2_config.c \ - pcre2_dfa_exec.c \ - pcre2_exec.c \ - pcre2_fullinfo.c \ - pcre2_get.c \ - pcre2_globals.c \ - pcre2_jit_compile.c \ - pcre2_jit_test.c \ - pcre2_maketables.c \ - pcre2_newline.c \ - pcre2_ord2utf8.c \ - pcre16_ord2utf16.c \ - pcre32_ord2utf32.c \ - pcre2_printint.c \ - pcre2_refcount.c \ - pcre2_string_utils.c \ - pcre2_study.c \ - pcre2_tables.c \ - pcre2_valid_utf8.c \ - pcre2_version.c \ - pcre2_xclass.c \ - pcre16_utf16_utils.c \ - pcre32_utf32_utils.c \ - pcre16_valid_utf16.c \ - pcre32_valid_utf32.c \ perltest.pl \ - ucp.h \ - makevp.bat \ - pcre.def \ - libpcre.def \ - libpcreposix.def" + libpcre2-8.pc.in \ + libpcre2-16.pc.in \ + libpcre2-32.pc.in \ + libpcre2-posix.pc.in \ + src/dftables.c \ + src/pcre2.h.in \ + src/pcre2_auto_possess.c \ + src/pcre2_compile.c \ + src/pcre2_config.c \ + src/pcre2_context.c \ + src/pcre2_dfa_match.c \ + src/pcre2_error.c \ + src/pcre2_internal.h \ + src/pcre2_intmodedep.h \ + src/pcre2_jit_compile.c \ + src/pcre2_jit_match.c \ + src/pcre2_jit_misc.c \ + src/pcre2_jit_test.c \ + src/pcre2_maketables.c \ + src/pcre2_match.c \ + src/pcre2_match_data.c \ + src/pcre2_newline.c \ + src/pcre2_ord2utf.c \ + src/pcre2_pattern_info.c \ + src/pcre2_printint.c \ + src/pcre2_string_utils.c \ + src/pcre2_study.c \ + src/pcre2_substring.c \ + src/pcre2_tables.c \ + src/pcre2_ucd.c \ + src/pcre2_ucp.h \ + src/pcre2_valid_utf.c \ + src/pcre2_xclass.c \ + src/pcre2demo.c \ + src/pcre2grep.c \ + src/pcre2posix.c \ + src/pcre2posix.h \ + src/pcre2test.c" echo Detrailing perl ./Detrail $files doc/p* doc/html/* diff --git a/README b/README index f6bb0b6..d765b21 100644 --- a/README +++ b/README @@ -1,7 +1,7 @@ README file for PCRE2 (Perl-compatible regular expression library) ------------------------------------------------------------------ -PCRE2 is a re-implementation of the original PCRE library with an entirely new +PCRE2 is a re-implementation of the original PCRE library with an entirely new API. The latest release of PCRE2 is always available in three alternative formats from: @@ -11,7 +11,7 @@ FIXME: THIS WILL NOT BE THE CASE UNTIL THERE IS A FORMAL RELEASE. ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2 ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip -There is a mailing list for discussion about the development of PCRE (both the +There is a mailing list for discussion about the development of PCRE (both the original and new APIs) at pcre-dev@exim.org. You can access the archives and subscribe or manage your subscription here: @@ -41,7 +41,7 @@ The PCRE2 APIs PCRE2 is written in C, and it has its own API. There are three sets of functions, one for the 8-bit library, which processes strings of bytes, one for the 16-bit library, which processes strings of 16-bit values, and one for the -32-bit library, which processes strings of 32-bit values. As this is a new API, +32-bit library, which processes strings of 32-bit values. As this is a new API, there as yet no C++ wrappers. The distribution does contain a set of C wrapper functions for the 8-bit @@ -102,7 +102,7 @@ NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and "make" you may be able to build PCRE2 using autotools in the same way as for many Unix-like systems. -PCRE2 can also be configured using CMake, which can be run in various ways +PCRE2 can also be configured using CMake, which can be run in various ways (command line, GUI, etc). This creates Makefiles, solution files, etc. The file NON-AUTOTOOLS-BUILD has information about CMake. @@ -186,13 +186,13 @@ library. They are also documented in the pcre2build man page. handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to configure one library with UTF support and the other without in the same configuration. - + Even when --enable-unicode is included, the use of a UTF encoding still has to be enabled by an option at run time. When PCRE2 is compiled with this option, its input can only either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms. It is not possible to use both --enable-unicode and --enable-ebcdic at the same time. - + When --enable-unicode is specified, as well as supporting UTF strings, PCRE2 includes support for the \P, \p, and \X sequences that recognize Unicode character properties. However, only the basic two-letter properties such as @@ -248,7 +248,7 @@ library. They are also documented in the pcre2build man page. cause programs to crash in strange ways. There is a discussion about stack sizes in the pcre2stack man page. -. In the 8-bit library, the default maximum compiled pattern size is around +. In the 8-bit library, the default maximum compiled pattern size is around 64K. You can increase this by adding --with-link-size=3 to the "configure" command. PCRE2 then uses three bytes instead of two for offsets to different parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is @@ -360,7 +360,7 @@ The "configure" script builds the following files for the basic C library: . src/pcre2.h the public PCRE2 header file . pcre2-config script that shows the building settings such as CFLAGS that were set for "configure" -. libpcre2-8.pc ) +. libpcre2-8.pc ) . libpcre2-16.pc ) data for the pkg-config command . libpcre2-32.pc ) . libpcre2-posix.pc ) @@ -452,7 +452,7 @@ prints the version number, and outputs information about where the 8-bit library is installed. This command can be included in makefiles for programs that use PCRE2, saving the programmer -from having to remember too many details. Run pcre2-config with no arguments to +from having to remember too many details. Run pcre2-config with no arguments to obtain a list of possible arguments. The pkg-config command is another system for saving and retrieving information @@ -593,7 +593,7 @@ bug in PCRE2. The third set of tests checks pcre2_maketables(), the facility for building a set of character tables for a specific locale and using them instead of the -default tables. The script uses the "locale" command to check for the +default tables. The script uses the "locale" command to check for the availability of the "fr_FR", "french", or "fr" locale, and uses the first one that it finds. If the "locale" command fails, or if its output doesn't include "fr_FR", "french", or "fr" in the list of available locales, the third test @@ -609,7 +609,7 @@ of the French locale have been encountered. The test passes if its output matches any one of them. The fourth and fifth tests check UTF and Unicode property support, the fourth -being compatible with the perltest.pl script, and the fifth checking +being compatible with the perltest.pl script, and the fifth checking PCRE2-specific things. The sixth and seventh tests check the pcre2_dfa_match() alternative matching @@ -623,8 +623,8 @@ change) and when Unicode support is enabled. The ninth and tenth tests are run only in 8-bit mode, and the eleventh and twelfth tests are run only in 16-bit and 32-bit modes. These are tests that generate different output in 8-bit mode. Each pair are for general cases and -Unicode support, respectively. The thirteenth test checks the handling of -non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit +Unicode support, respectively. The thirteenth test checks the handling of +non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit modes. The fourteenth test is run only when JIT support is not available, and the @@ -632,8 +632,8 @@ fifteenth test is run only when JIT support is available. They test some JIT-specific features such as information output from pcre2test about JIT compilation. -The sixteenth and seventeenth tests are run only in 8-bit mode. They check the -POSIX interface to the 8-bit library, withouth and with Unicode support, +The sixteenth and seventeenth tests are run only in 8-bit mode. They check the +POSIX interface to the 8-bit library, withouth and with Unicode support, respectively. @@ -692,9 +692,9 @@ will cause PCRE2 to malfunction. File manifest ------------- -The distribution should contain the files listed below. +The distribution should contain the files listed below. -(A) Source files for the PCRE2 library functions and their headers are found in +(A) Source files for the PCRE2 library functions and their headers are found in the src directory: src/dftables.c auxiliary program for building pcre2_chartables.c @@ -705,25 +705,25 @@ The distribution should contain the files listed below. specified, used by copying to pcre2_chartables.c src/pcre2posix.c ) - src/pcre2_auto_possess.c ) + src/pcre2_auto_possess.c ) src/pcre2_compile.c ) src/pcre2_config.c ) - src/pcre2_context.c ) + src/pcre2_context.c ) src/pcre2_dfa_match.c ) - src/pcre2_error.c ) + src/pcre2_error.c ) src/pcre2_exec.c ) src/pcre2_jit_compile.c ) - src/pcre2_jit_match.c ) sources for the functions in the library, + src/pcre2_jit_match.c ) sources for the functions in the library, src/pcre2_jit_misc.c ) and some internal functions that they use src/pcre2_maketables.c ) src/pcre2_match.c ) - src/pcre2_match_data.c ) + src/pcre2_match_data.c ) src/pcre2_newline.c ) src/pcre2_ord2utf.c ) src/pcre2_pattern_info.c ) src/pcre2_string_utils.c ) src/pcre2_study.c ) - src/pcre2_substring.c ) + src/pcre2_substring.c ) src/pcre2_tables.c ) src/pcre2_ucd.c ) src/pcre2_valid_utf.c ) diff --git a/RunGrepTest b/RunGrepTest index 35b7658..b5d8a09 100755 --- a/RunGrepTest +++ b/RunGrepTest @@ -23,7 +23,7 @@ pcre2grep=$builddir/pcre2grep if [ ! -x $pcre2grep ] ; then echo "** $pcre2grep does not exist or is not execuatble." exit 1 -fi +fi valgrind= while [ $# -gt 0 ] ; do diff --git a/RunTest b/RunTest index 48259de..2f4d31a 100755 --- a/RunTest +++ b/RunTest @@ -126,7 +126,7 @@ fi checkresult() { - if [ $1 -ne 0 ] ; then + if [ $1 -ne 0 ] ; then echo "** pcre2test failed - check testtry" exit 1 fi diff --git a/configure.ac b/configure.ac index 8fd6349..7f53388 100644 --- a/configure.ac +++ b/configure.ac @@ -106,7 +106,7 @@ AC_ARG_ENABLE(pcre32,,,enable_pcre32=no) if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono" then echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]" - exit 1 + exit 1 fi # Handle --disable-pcre2-8 (enabled by default) @@ -512,7 +512,7 @@ if test "$enable_jit" = "yes"; then CC="$PTHREAD_CC" CFLAGS="$PTHREAD_CFLAGS $CFLAGS" LIBS="$PTHREAD_LIBS $LIBS" - fi + fi AC_DEFINE([SUPPORT_JIT], [], [ Define to any value to enable support for Just-In-Time compiling.]) else @@ -538,7 +538,7 @@ if test "$enable_stack_for_recursion" = "no"; then matching. This can sometimes be a problem on systems that have stacks of limited size. Define HEAP_MATCH_RECURSE to any value to get a version that doesn't use recursion in the match() function; instead - it creates its own stack by steam using memory from the heap. For more + it creates its own stack by steam using memory from the heap. For more detail, see the comments and other stuff just above the match() function.]) fi @@ -559,8 +559,8 @@ if test $with_pcre2grep_bufsize -lt 8192 ; then with_pcre2grep_bufsize="8192" else if test $? -gt 1 ; then - AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize]) - fi + AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize]) + fi fi AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [ @@ -579,9 +579,9 @@ elif test "$enable_pcre2test_libreadline" = "yes"; then fi AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [ - The value of NEWLINE_DEFAULT determines the default newline character - sequence. PCRE2 client programs can override this by selecting other values - at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), + The value of NEWLINE_DEFAULT determines the default newline character + sequence. PCRE2 client programs can override this by selecting other values + at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5 (ANYCRLF).]) if test "$enable_bsr_anycrlf" = "yes"; then diff --git a/doc/html/NON-AUTOTOOLS-BUILD.txt b/doc/html/NON-AUTOTOOLS-BUILD.txt index 6f36fe6..f5e22a6 100644 --- a/doc/html/NON-AUTOTOOLS-BUILD.txt +++ b/doc/html/NON-AUTOTOOLS-BUILD.txt @@ -1,9 +1,9 @@ Building PCRE2 without using autotools -------------------------------------- -This document has been converted from the PCRE1 document, but is not yet -complete. I have removed a number of quite old sections about building in -various environments, as they applied only to PCRE1 and are probably out of +This document has been converted from the PCRE1 document, but is not yet +complete. I have removed a number of quite old sections about building in +various environments, as they applied only to PCRE1 and are probably out of date. @@ -57,7 +57,7 @@ can skip ahead to the CMake section. environment. In particular, you can alter the definition of the NEWLINE macro to specify what character(s) you want to be interpreted as line terminators. - + When you compile any of the PCRE2 modules, you must specify -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the sources. @@ -100,7 +100,7 @@ can skip ahead to the CMake section. pcre2_chartables.c pcre2_compile.c pcre2_config.c - pcre2_context.c + pcre2_context.c pcre2_dfa_match.c pcre2_error.c pcre2_jit_compile.c @@ -114,7 +114,7 @@ can skip ahead to the CMake section. pcre2_pattern_info.c pcre2_string_utils.c pcre2_study.c - pcre2_substring.c + pcre2_substring.c pcre2_tables.c pcre2_ucd.c pcre2_valid_utf.c @@ -138,8 +138,8 @@ can skip ahead to the CMake section. (6) If you want to build a 16-bit library or 32-bit library (as well as, or instead of the 8-bit library) just supply 16 or 32 as the value of - -DPCRE2_CODE_UNIT_WIDTH when you are compiling. - + -DPCRE2_CODE_UNIT_WIDTH when you are compiling. + (7) If you want to build the POSIX wrapper functions (which apply only to the 8-bit library), ensure that you have the pcre2posix.h file and then compile pcre2posix.c. Link the result (on its own) as the pcre2posix @@ -295,7 +295,7 @@ Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no spaces in the names for your CMake installation and your PCRE2 source and build directories. -The following instructions were contributed by a PCRE1 user, but they should +The following instructions were contributed by a PCRE1 user, but they should also work for PCRE2. If they are not followed exactly, errors may occur. In the event that errors do occur, it is recommended that you delete the CMake cache before attempting to repeat the CMake build process. In the CMake GUI, the @@ -394,9 +394,9 @@ required. For details, please see this web site: There is also a mirror here: http://www.vsoft-software.com/downloads.html - -The site currently has ports for PCRE1 releases, but PCRE2 should follow in due -course. + +The site currently has ports for PCRE1 releases, but PCRE2 should follow in due +course. ========================== Last Updated: 28 September 2014 diff --git a/doc/html/README.txt b/doc/html/README.txt index f6bb0b6..d765b21 100644 --- a/doc/html/README.txt +++ b/doc/html/README.txt @@ -1,7 +1,7 @@ README file for PCRE2 (Perl-compatible regular expression library) ------------------------------------------------------------------ -PCRE2 is a re-implementation of the original PCRE library with an entirely new +PCRE2 is a re-implementation of the original PCRE library with an entirely new API. The latest release of PCRE2 is always available in three alternative formats from: @@ -11,7 +11,7 @@ FIXME: THIS WILL NOT BE THE CASE UNTIL THERE IS A FORMAL RELEASE. ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2 ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip -There is a mailing list for discussion about the development of PCRE (both the +There is a mailing list for discussion about the development of PCRE (both the original and new APIs) at pcre-dev@exim.org. You can access the archives and subscribe or manage your subscription here: @@ -41,7 +41,7 @@ The PCRE2 APIs PCRE2 is written in C, and it has its own API. There are three sets of functions, one for the 8-bit library, which processes strings of bytes, one for the 16-bit library, which processes strings of 16-bit values, and one for the -32-bit library, which processes strings of 32-bit values. As this is a new API, +32-bit library, which processes strings of 32-bit values. As this is a new API, there as yet no C++ wrappers. The distribution does contain a set of C wrapper functions for the 8-bit @@ -102,7 +102,7 @@ NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and "make" you may be able to build PCRE2 using autotools in the same way as for many Unix-like systems. -PCRE2 can also be configured using CMake, which can be run in various ways +PCRE2 can also be configured using CMake, which can be run in various ways (command line, GUI, etc). This creates Makefiles, solution files, etc. The file NON-AUTOTOOLS-BUILD has information about CMake. @@ -186,13 +186,13 @@ library. They are also documented in the pcre2build man page. handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to configure one library with UTF support and the other without in the same configuration. - + Even when --enable-unicode is included, the use of a UTF encoding still has to be enabled by an option at run time. When PCRE2 is compiled with this option, its input can only either be ASCII or UTF-8/16/32, even when running on EBCDIC platforms. It is not possible to use both --enable-unicode and --enable-ebcdic at the same time. - + When --enable-unicode is specified, as well as supporting UTF strings, PCRE2 includes support for the \P, \p, and \X sequences that recognize Unicode character properties. However, only the basic two-letter properties such as @@ -248,7 +248,7 @@ library. They are also documented in the pcre2build man page. cause programs to crash in strange ways. There is a discussion about stack sizes in the pcre2stack man page. -. In the 8-bit library, the default maximum compiled pattern size is around +. In the 8-bit library, the default maximum compiled pattern size is around 64K. You can increase this by adding --with-link-size=3 to the "configure" command. PCRE2 then uses three bytes instead of two for offsets to different parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is @@ -360,7 +360,7 @@ The "configure" script builds the following files for the basic C library: . src/pcre2.h the public PCRE2 header file . pcre2-config script that shows the building settings such as CFLAGS that were set for "configure" -. libpcre2-8.pc ) +. libpcre2-8.pc ) . libpcre2-16.pc ) data for the pkg-config command . libpcre2-32.pc ) . libpcre2-posix.pc ) @@ -452,7 +452,7 @@ prints the version number, and outputs information about where the 8-bit library is installed. This command can be included in makefiles for programs that use PCRE2, saving the programmer -from having to remember too many details. Run pcre2-config with no arguments to +from having to remember too many details. Run pcre2-config with no arguments to obtain a list of possible arguments. The pkg-config command is another system for saving and retrieving information @@ -593,7 +593,7 @@ bug in PCRE2. The third set of tests checks pcre2_maketables(), the facility for building a set of character tables for a specific locale and using them instead of the -default tables. The script uses the "locale" command to check for the +default tables. The script uses the "locale" command to check for the availability of the "fr_FR", "french", or "fr" locale, and uses the first one that it finds. If the "locale" command fails, or if its output doesn't include "fr_FR", "french", or "fr" in the list of available locales, the third test @@ -609,7 +609,7 @@ of the French locale have been encountered. The test passes if its output matches any one of them. The fourth and fifth tests check UTF and Unicode property support, the fourth -being compatible with the perltest.pl script, and the fifth checking +being compatible with the perltest.pl script, and the fifth checking PCRE2-specific things. The sixth and seventh tests check the pcre2_dfa_match() alternative matching @@ -623,8 +623,8 @@ change) and when Unicode support is enabled. The ninth and tenth tests are run only in 8-bit mode, and the eleventh and twelfth tests are run only in 16-bit and 32-bit modes. These are tests that generate different output in 8-bit mode. Each pair are for general cases and -Unicode support, respectively. The thirteenth test checks the handling of -non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit +Unicode support, respectively. The thirteenth test checks the handling of +non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit modes. The fourteenth test is run only when JIT support is not available, and the @@ -632,8 +632,8 @@ fifteenth test is run only when JIT support is available. They test some JIT-specific features such as information output from pcre2test about JIT compilation. -The sixteenth and seventeenth tests are run only in 8-bit mode. They check the -POSIX interface to the 8-bit library, withouth and with Unicode support, +The sixteenth and seventeenth tests are run only in 8-bit mode. They check the +POSIX interface to the 8-bit library, withouth and with Unicode support, respectively. @@ -692,9 +692,9 @@ will cause PCRE2 to malfunction. File manifest ------------- -The distribution should contain the files listed below. +The distribution should contain the files listed below. -(A) Source files for the PCRE2 library functions and their headers are found in +(A) Source files for the PCRE2 library functions and their headers are found in the src directory: src/dftables.c auxiliary program for building pcre2_chartables.c @@ -705,25 +705,25 @@ The distribution should contain the files listed below. specified, used by copying to pcre2_chartables.c src/pcre2posix.c ) - src/pcre2_auto_possess.c ) + src/pcre2_auto_possess.c ) src/pcre2_compile.c ) src/pcre2_config.c ) - src/pcre2_context.c ) + src/pcre2_context.c ) src/pcre2_dfa_match.c ) - src/pcre2_error.c ) + src/pcre2_error.c ) src/pcre2_exec.c ) src/pcre2_jit_compile.c ) - src/pcre2_jit_match.c ) sources for the functions in the library, + src/pcre2_jit_match.c ) sources for the functions in the library, src/pcre2_jit_misc.c ) and some internal functions that they use src/pcre2_maketables.c ) src/pcre2_match.c ) - src/pcre2_match_data.c ) + src/pcre2_match_data.c ) src/pcre2_newline.c ) src/pcre2_ord2utf.c ) src/pcre2_pattern_info.c ) src/pcre2_string_utils.c ) src/pcre2_study.c ) - src/pcre2_substring.c ) + src/pcre2_substring.c ) src/pcre2_tables.c ) src/pcre2_ucd.c ) src/pcre2_valid_utf.c ) diff --git a/doc/html/index.html b/doc/html/index.html index 4e264ec..3351e23 100644 --- a/doc/html/index.html +++ b/doc/html/index.html @@ -1,10 +1,10 @@ - +--> PCRE2 specification @@ -87,7 +87,7 @@ in the library. There is a single page for each triple of 8-bit/16-bit/32-bit functions.

- +
@@ -153,7 +153,7 @@ functions. - + diff --git a/doc/html/pcre2.html b/doc/html/pcre2.html index f8672b8..a94bd1a 100644 --- a/doc/html/pcre2.html +++ b/doc/html/pcre2.html @@ -43,11 +43,11 @@ of Unicode in use can be discovered by running

-The three libraries contain identical sets of functions, with names ending in -_8, _16, or _32, respectively (for example, pcre2_compile_8()). However, -by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just +The three libraries contain identical sets of functions, with names ending in +_8, _16, or _32, respectively (for example, pcre2_compile_8()). However, +by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just one code unit width can be written using generic names such as -pcre2_compile(), and the documentation is written assuming that this is +pcre2_compile(), and the documentation is written assuming that this is the case.

diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html index 66a164f..47733e3 100644 --- a/doc/html/pcre2api.html +++ b/doc/html/pcre2api.html @@ -306,7 +306,7 @@ unknown should also use the real function names. (Unfortunately, it is not possible in C code to save and restore the value of a macro.)

-If PCRE2_CODE_UNIT_WIDTH is not defined before including pcre2.h, a +If PCRE2_CODE_UNIT_WIDTH is not defined before including pcre2.h, a compiler error occurs.

@@ -443,7 +443,7 @@ below.

The choice of newline convention does not affect the interpretation of -the \n or \r escape sequences, nor does it affect what \R matches, which has +the \n or \r escape sequences, nor does it affect what \R matches, which has its own separate control.


MULTITHREADING
@@ -553,7 +553,7 @@ The memory used for a general context should be freed by calling: The compile context

-A compile context is required if you want to change the default values of any +A compile context is required if you want to change the default values of any of the following compile-time parameters:

   What \R matches (Unicode newlines or CR, LF, CRLF only);
@@ -562,7 +562,7 @@ of the following compile-time parameters:
   The compile time nested parentheses limit;
   An external function for stack checking.
 
-A compile context is also required if you are using custom memory management. +A compile context is also required if you are using custom memory management. If none of these apply, just pass NULL as the context argument of pcre2_compile().

@@ -579,33 +579,33 @@ A compile context is created, copied, and freed by the following functions: void pcre2_compile_context_free(pcre2_compile_context *ccontext);

-A compile context is created with default values for its parameters. These can -be changed by calling the following functions, which return 0 on success, or +A compile context is created with default values for its parameters. These can +be changed by calling the following functions, which return 0 on success, or PCRE2_ERROR_BADDATA if invalid data is detected. int pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value);

-The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only CR, LF, -or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line -ending sequence. The value of this parameter does not affect what is compiled; +The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only CR, LF, +or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line +ending sequence. The value of this parameter does not affect what is compiled; it is just saved with the compiled pattern. The value is used by the JIT -compiler and by the two interpreted matching functions, pcre2_match() and +compiler and by the two interpreted matching functions, pcre2_match() and pcre2_dfa_match(). int pcre2_set_character_tables(pcre2_compile_context *ccontext, const unsigned char *tables);

-The value must be the result of a call to pcre2_maketables(), whose only +The value must be the result of a call to pcre2_maketables(), whose only argument is a general context. This function builds a set of character tables in the current locale. int pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t value);

-This specifies which characters or character sequences are to be recognized as +This specifies which characters or character sequences are to be recognized as newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only), -PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character +PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or PCRE2_NEWLINE_ANY (any Unicode newline sequence).

@@ -627,7 +627,7 @@ using up too much system stack when being compiled.

There is at least one application that runs PCRE2 in threads with very limited -system stack, where running out of stack is to be avoided at all costs. The +system stack, where running out of stack is to be avoided at all costs. The parenthesis limit above cannot take account of how much stack is actually available. For a finer control, you can supply a function that is called whenever pcre2_compile() starts to compile a parenthesized part of a @@ -638,20 +638,20 @@ function should return zero if all is well, or non-zero to force an error. The match context

-A match context is required if you want to change the default values of any +A match context is required if you want to change the default values of any of the following match-time parameters:

   What \R matches (Unicode newlines or CR, LF, CRLF only);
   A callout function;
-  The limit for calling match();  
+  The limit for calling match();
   The limit for calling match() recursively;
   The newline character sequence;
 
-A match context is also required if you are using custom memory management. -If none of these apply, just pass NULL as the context argument of +A match context is also required if you are using custom memory management. +If none of these apply, just pass NULL as the context argument of pcre2_match(), pcre2_dfa_match(), or pcre2_jit_match(). -Changing the newline value or what \R matches at match time disables the use -of JIT via pcre2_match(). +Changing the newline value or what \R matches at match time disables the use +of JIT via pcre2_match().

A match context is created, copied, and freed by the following functions: @@ -666,8 +666,8 @@ A match context is created, copied, and freed by the following functions: void pcre2_match_context_free(pcre2_match_context *mcontext);

-A match context is created with default values for its parameters. These can -be changed by calling the following functions, which return 0 on success, or +A match context is created with default values for its parameters. These can +be changed by calling the following functions, which return 0 on success, or PCRE2_ERROR_BADDATA if invalid data is detected. int pcre2_set_callout(pcre2_match_context *mcontext, int (*callout_function)(pcre2_callout_block *), @@ -693,7 +693,7 @@ calls repeatedly (sometimes recursively). The limit set by match_limit is imposed on the number of times this function is called during a match, which has the effect of limiting the amount of backtracking that can take place. For patterns that are not anchored, the count restarts from zero for each position -in the subject string. This limit is not relevant to pcre2_dfa_match(), +in the subject string. This limit is not relevant to pcre2_dfa_match(), which ignores it.

@@ -730,7 +730,7 @@ This limit is of use only if it is set smaller than match_limit. Limiting the recursion depth limits the amount of system stack that can be used, or, when PCRE2 has been compiled to use memory on the heap instead of the stack, the amount of heap memory that can be used. This limit is not relevant, -and is ignored, when matching is done using JIT compiled code or by the +and is ignored, when matching is done using JIT compiled code or by the pcre2_dfa_match() function.

@@ -751,9 +751,9 @@ limit is set, less than the default. void (*private_free)(void *, void *), void *memory_data);

-This function sets up two additional custom memory management functions for use +This function sets up two additional custom memory management functions for use by pcre2_match() when PCRE2 is compiled to use the heap for remembering -backtracking data, instead of recursive function calls that use the system +backtracking data, instead of recursive function calls that use the system stack. There is a discussion about PCRE2's stack usage in the pcre2stack documentation. See the @@ -765,7 +765,7 @@ limited stacks. Because of the greater use of memory management, general custom memory functions are provided so that special-purpose external code can be used for this case, because the memory blocks are all the same size. The blocks are retained by pcre2_match() until it is about to exit -so that they can be re-used when possible during the match. In the absence of +so that they can be re-used when possible during the match. In the absence of these functions, the normal custom memory management functions are used, if supplied, otherwise the system functions.

@@ -785,7 +785,7 @@ required. The second argument is a pointer to memory into which the information is placed. If NULL is passed, the function returns the amount of memory that is needed for the requested information. For calls that return numerical values, the value is in bytes; when requesting these values, where should point -to appropriately aligned memory. For calls that return strings, the required +to appropriately aligned memory. For calls that return strings, the required length is given in code units, not counting the terminating zero.

@@ -809,7 +809,7 @@ compiling is available; otherwise it is set to zero. PCRE2_CONFIG_JITTARGET The where argument should point to a buffer that is at least 48 code -units long. (The exact length needed can be found by calling +units long. (The exact length needed can be found by calling pcre2_config() with where set to NULL.) The buffer is filled with a string that contains the name of the architecture for which the JIT compiler is configured, for example "x86 32bit (little endian + unaligned)". If JIT support @@ -820,9 +820,9 @@ the string, in code units, is returned. The output is an integer that contains the number of bytes used for internal linkage in compiled regular expressions. When PCRE2 is configured, the value -can be set to 2, 3, or 4, with the default being 2. This is the value that is -returned by pcre2_config(). However, when the 16-bit library is compiled, -a value of 3 is rounded up to 4, and when the 32-bit library is compiled, +can be set to 2, 3, or 4, with the default being 2. This is the value that is +returned by pcre2_config(). However, when the 16-bit library is compiled, +a value of 3 is rounded up to 4, and when the 32-bit library is compiled, internal linkages always use 4 bytes, so the configured value is not relevant.

@@ -908,16 +908,16 @@ units) is returned. pcre2_code_free(pcre2_code *code);

-This function compiles a pattern, defined by a pointer to a string of code -units and a length, into an internal form. If the pattern is zero-terminated, -the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a -pointer to a block of memory that contains the compiled pattern and related -data. The caller must free the memory by calling pcre2_code_free() when +This function compiles a pattern, defined by a pointer to a string of code +units and a length, into an internal form. If the pattern is zero-terminated, +the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a +pointer to a block of memory that contains the compiled pattern and related +data. The caller must free the memory by calling pcre2_code_free() when it is no longer needed.

-If the compile context argument ccontext is NULL, the memory is obtained -by calling malloc(). Otherwise, it is obtained from the same memory +If the compile context argument ccontext is NULL, the memory is obtained +by calling malloc(). Otherwise, it is obtained from the same memory function that was used for the compile context.

@@ -927,7 +927,7 @@ options are described below. Some of them (in particular, those that are compatible with Perl, but some others as well) can also be set and unset from within the pattern (see the detailed description in the pcre2pattern -documentation). +documentation).

For those options that can be different in different parts of the pattern, the @@ -936,7 +936,7 @@ compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at the time of matching as well as at compile time.

-Other, less frequently required compile-time parameters (for example, the +Other, less frequently required compile-time parameters (for example, the newline setting) can be provided in a compile context (as described above).

@@ -962,10 +962,10 @@ This code fragment shows a typical straightforward call to
   pcre2_code *re;
   PCRE2_SIZE erroffset;
-  int errorcode; 
+  int errorcode;
   re = pcre2_compile(
     "^A.*Z",                /* the pattern */
-    PCRE2_ZERO_TERMINATED,  /* the pattern is zero-terminated */ 
+    PCRE2_ZERO_TERMINATED,  /* the pattern is zero-terminated */
     0,                      /* default options */
     &errorcode,             /* for error code */
     &erroffset,             /* for error offset */
@@ -984,14 +984,14 @@ Perl.
 
   PCRE2_ALLOW_EMPTY_CLASS
 
-By default, for compatibility with Perl, a closing square bracket that -immediately follows an opening one is treated as a data character for the -class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which -therefore contains no characters and so can never match. +By default, for compatibility with Perl, a closing square bracket that +immediately follows an opening one is treated as a data character for the +class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which +therefore contains no characters and so can never match.
   PCRE2_ALT_BSUX
 
-This option request alternative handling of three escape sequences, which +This option request alternative handling of three escape sequences, which makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set:

@@ -1023,7 +1023,7 @@ documentation.

If this bit is set, letters in the pattern match both upper and lower case letters in the subject. It is equivalent to Perl's /i option, and it can be -changed within a pattern by a (?i) option setting. +changed within a pattern by a (?i) option setting.
   PCRE2_DOLLAR_ENDONLY
 
@@ -1076,7 +1076,7 @@ Which characters are interpreted as newlines can be specified by a setting in the compile context that is passed to pcre2_compile() or by a special sequence at the start of the pattern, as described in the section entitled "Newline conventions" -in the pcre2pattern documentation. A default is defined when PCRE2 is +in the pcre2pattern documentation. A default is defined when PCRE2 is built.
   PCRE2_FIRSTLINE
@@ -1091,7 +1091,7 @@ If this option is set, a back reference to an unset subpattern group matches an
 empty string (by default this causes the current matching alternative to fail).
 A pattern such as (\1)(a) succeeds when this option is set (assuming it can
 find an "a" in the subject), whereas it fails by default, for Perl
-compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka 
+compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka
 JavaScript).
 
   PCRE2_MULTILINE
@@ -1116,10 +1116,10 @@ occurrences of ^ or $ in a pattern, setting PCRE2_MULTILINE has no effect.
   PCRE2_NEVER_UCP
 
This option locks out the use of Unicode properties for handling \B, \b, \D, -\d, \S, \s, \W, \w, and some of the POSIX character classes, as described -for the PCRE2_UCP option below. In particular, it prevents the creator of the -pattern from enabling this facility by starting the pattern with (*UCP). This -may be useful in applications that process patterns from external sources. The +\d, \S, \s, \W, \w, and some of the POSIX character classes, as described +for the PCRE2_UCP option below. In particular, it prevents the creator of the +pattern from enabling this facility by starting the pattern with (*UCP). This +may be useful in applications that process patterns from external sources. The option combination PCRE_UCP and PCRE_NEVER_UCP causes an error.
   PCRE2_NEVER_UTF
@@ -1195,7 +1195,7 @@ pattern
   (*MARK:A)(X|Y)
 
The minimum length for a match is one character. If the subject is "ABC", there -will be attempts to match "ABC", "BC", and "C". An attempt to match an empty +will be attempts to match "ABC", "BC", and "C". An attempt to match an empty string at the end of the subject does not take place, because PCRE2 knows that the subject is now too short, and so the (*MARK) is never encountered. In this case, the optimization does not affect the overall match result, which is still @@ -1211,7 +1211,7 @@ and UTF-32 strings in the pcre2unicode -document. +document. If an invalid UTF sequence is found, pcre2_compile() returns a negative error code.

@@ -1391,9 +1391,9 @@ The possible values for the second argument are defined in pcre2.h, and are as follows:
   PCRE2_INFO_ALLOPTIONS
-  PCRE2_INFO_ARGOPTIONS 
+  PCRE2_INFO_ARGOPTIONS
 
-Return a copy of the pattern's options. The third argument should point to a +Return a copy of the pattern's options. The third argument should point to a uint32_t variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that were passed to pcre2_compile(), whereas PCRE2_INFO_ALLOPTIONS returns the compile options as modified by any top-level option settings at the start @@ -1411,7 +1411,7 @@ alternatives begin with one of the following: \G always .* if PCRE2_DOTALL is set and there are no back references to the subpattern in which .* appears
-For such patterns, the PCRE2_ANCHORED bit is set in the options returned for +For such patterns, the PCRE2_ANCHORED bit is set in the options returned for PCRE2_INFO_ALLOPTIONS.
   PCRE2_INFO_BACKREFMAX
@@ -1499,7 +1499,7 @@ return zero. The third argument should point to a size_t variable.
 
Returns 1 if there is a rightmost literal code unit that must exist in any matched string, other than at its start. The third argument should point to an -uint32_t variable. If there is no such value, 0 is returned. When 1 is +uint32_t variable. If there is no such value, 0 is returned. When 1 is returned, the code unit value itself can be retrieved using PCRE2_INFO_LASTCODEUNIT.

@@ -1657,11 +1657,11 @@ pattern with the JIT compiler does not alter the value returned by this option. void pcre2_match_data_free(pcre2_match_data *match_data);

-Information about successful and unsuccessful matches is placed in a match +Information about successful and unsuccessful matches is placed in a match data block, which is an opaque structure that is accessed by function calls. In particular, the match data block contains a vector of offsets into the subject string that define the matched part of the subject and any substrings that were -capured. This is know as the ovector. +capured. This is know as the ovector.

Before calling pcre2_match() or pcre2_dfa_match() you must create a @@ -1676,12 +1676,12 @@ return the overall matched string.

For pcre2_match_data_create_from_pattern(), the first argument is a -pointer to a compiled pattern. In this case the ovector is created to be +pointer to a compiled pattern. In this case the ovector is created to be exactly the right size to hold all the substrings a pattern might capture.

-The second argument of both these functions ia a pointer to a general context, -which can specify custom memory management for obtaining the memory for the +The second argument of both these functions ia a pointer to a general context, +which can specify custom memory management for obtaining the memory for the match data block. If you are not using custom memory management, pass NULL.

@@ -1728,8 +1728,8 @@ Here is an example of a simple call to pcre2_match(): match_data, /* the match data block */ NULL); /* a match context; NULL means use defaults */ -If the subject string is zero-terminated, the length can be given as -PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common +If the subject string is zero-terminated, the length can be given as +PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common matching parameters are to be changed. For details, see the section on the match context above. @@ -1742,7 +1742,7 @@ The subject string is passed to pcre2_match() as a pointer in subject, a length in length, and a starting offset in startoffset. The length and offset are in code units, not characters. That is, they are in bytes for the 8-bit library, 16-bit code units for the -16-bit library, and 32-bit code units for the 32-bit library, whether or not +16-bit library, and 32-bit code units for the 32-bit library, whether or not UTF processing is enabled.

@@ -1752,7 +1752,7 @@ zero, the search for a match starts at the beginning of the subject, and this is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset must point to the start of a character, or to the end of the subject (in UTF-32 mode, one code unit equals one character, so all offsets are valid). Like the -pattern string, the subject may contain binary zeroes. +pattern string, the subject may contain binary zeroes.

A non-zero starting offset is useful when searching for another match in the @@ -1814,7 +1814,7 @@ JIT matching is disabled and the normal interpretive code in The PCRE2_ANCHORED option limits pcre2_match() to matching at the first matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out to be anchored by virtue of its contents, it cannot be made unachored at -matching time. Note that setting the option at match time disables JIT +matching time. Note that setting the option at match time disables JIT matching.

   PCRE2_NOTBOL
@@ -1867,14 +1867,14 @@ and
 UTF-32 strings
 in the
 pcre2unicode
-page. 
+page.
 

If you know that your subject is valid, and you want to skip these checks for performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling pcre2_match(). You might want to do this for the second and subsequent calls to pcre2_match() if you are making repeated calls to find all the -matches in a single subject string. +matches in a single subject string.

NOTE: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid string @@ -1908,9 +1908,9 @@ documentation.


NEWLINE HANDLING WHEN MATCHING

-When PCRE2 is built, a default newline convention is set; this is usually the -standard convention for the operating system. The default can be overridden in -either a +When PCRE2 is built, a default newline convention is set; this is usually the +standard convention for the operating system. The default can be overridden in +either a compile context or a match context. @@ -1953,7 +1953,7 @@ valid newline sequence and explicit \r or \n escapes appear in the pattern.

In general, a pattern matches a certain portion of the subject, and in -addition, further substrings from the subject may be picked out by +addition, further substrings from the subject may be picked out by parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's book, this is called "capturing" in what follows, and the phrase "capturing subpattern" is used for a fragment of a pattern that picks out a substring. @@ -1964,11 +1964,11 @@ pattern.

The overall matched string and any captured substrings are returned to the -caller via a vector of PCRE2_SIZE values, called the ovector. This is +caller via a vector of PCRE2_SIZE values, called the ovector. This is contained within the match data block. -You can obtain direct access to the ovector by calling -pcre2_get_ovector_pointer() to find its address, and +You can obtain direct access to the ovector by calling +pcre2_get_ovector_pointer() to find its address, and pcre2_get_ovector_count() to find the number of pairs of values it contains. Alternatively, you can use the auxiliary functions for accessing captured substrings @@ -2044,26 +2044,26 @@ Other information about the match PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data);

-In addition to the offsets in the ovector, other information about a match is +In addition to the offsets in the ovector, other information about a match is retained in the match data block and can be retrieved by the above functions.

When a (*MARK) name is to be passed back, pcre2_get_mark() returns a -pointer to the zero-terminated name, which is within the compiled pattern. -Otherwise NULL is returned. A (*MARK) name may be available after a failed +pointer to the zero-terminated name, which is within the compiled pattern. +Otherwise NULL is returned. A (*MARK) name may be available after a failed match or a partial match, as well as after a successful one.

The offset of the character at which the successful match started is returned by pcre2_get_startchar(). This can be different to the value of -ovector[0] if the pattern contains the \K escape sequence. Note, +ovector[0] if the pattern contains the \K escape sequence. Note, however, the \K has no effect for a partial match.


Error return values from pcre2_match()

-If pcre2_match() fails, it returns a negative number. This can be +If pcre2_match() fails, it returns a negative number. This can be converted to a text string by calling pcre2_get_error_message(). Negative error codes are also returned by other functions, and are documented with them. The codes are given names in the header file. If UTF checking is in force and @@ -2205,7 +2205,7 @@ argument is a pointer to the match data block, the second is the group number, and the third is a pointer to a variable into which the length is placed.

-The pcre2_substring_copy_bynumber() function copies one string into a +The pcre2_substring_copy_bynumber() function copies one string into a supplied buffer, whereas pcre2_substring_get_bynumber() copies it into new memory, obtained using the same memory allocation function that was used for the match data block. The first two arguments of these functions are a @@ -2220,10 +2220,10 @@ This is updated to contain the actual number of code units used, excluding the terminating zero.

-For pcre2_substring_get_bynumber() the third and fourth arguments point -to variables that are updated with a pointer to the new memory and the number -of code units that comprise the substring, again excluding the terminating -zero. When the substring is no longer needed, the memory should be freed by +For pcre2_substring_get_bynumber() the third and fourth arguments point +to variables that are updated with a pointer to the new memory and the number +of code units that comprise the substring, again excluding the terminating +zero. When the substring is no longer needed, the memory should be freed by calling pcre2_substring_free().

@@ -2237,9 +2237,9 @@ attempt to get memory failed for pcre2_substring_get_bynumber().

   PCRE2_ERROR_NOSUBSTRING
 
-No substring with the given number was captured. This could be because there is -no capturing group of that number in the pattern, or because the group with -that number did not participate in the match, or because the ovector was too +No substring with the given number was captured. This could be because there is +no capturing group of that number in the pattern, or because the group with +that number did not participate in the match, or because the ovector was too small to capture that group.


EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS
@@ -2253,7 +2253,7 @@ small to capture that group.

The pcre2_substring_list_get() function extracts all available substrings and builds a list of pointers to them, and a second list that contains their -lengths (in code units), excluding a terminating zero that is added to each of +lengths (in code units), excluding a terminating zero that is added to each of them. All this is done in a single block of memory that is obtained using the same memory allocation function that was used to get the match data block.

@@ -2265,7 +2265,7 @@ NULL pointer. The address of the list of lengths is returned via therefore need the lengths, you may supply NULL as the lengthsptr argument to disable the creation of a list of lengths. The yield of the function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block -could not be obtained. When the list is no longer needed, it should be freed by +could not be obtained. When the list is no longer needed, it should be freed by calling pcre2_substring_list_free().

@@ -2312,7 +2312,7 @@ name.

Given the number, you can extract the substring directly, or use one of the functions described in the previous section. For convenience, there are also -"byname" functions that correspond to the "bynumber" functions, the only +"byname" functions that correspond to the "bynumber" functions, the only difference being that the second argument is a name instead of a number. However, if PCRE2_DUPNAMES is set and there are duplicate names, the behaviour may not be what you want (see the next section). @@ -2375,7 +2375,7 @@ numbers, and hence the captured data.

The traditional matching function uses a similar algorithm to Perl, which stops when it finds the first match, starting at a given point in the subject. If you -want to find all possible matches, or the longest possible match at a given +want to find all possible matches, or the longest possible match at a given position, consider using the alternative matching function (see below) instead. If you cannot use the alternative function, you can kludge it up by making use of the callout facility, which is described in the @@ -2566,8 +2566,8 @@ fail, this error is given.


SEE ALSO

-pcre2build(3), pcre2libs(3), pcre2callout(3), -pcre2matching(3), pcre2partial(3), pcre2posix(3), +pcre2build(3), pcre2libs(3), pcre2callout(3), +pcre2matching(3), pcre2partial(3), pcre2posix(3), pcre2demo(3), pcre2sample(3), pcre2stack(3).


AUTHOR
diff --git a/doc/html/pcre2build.html b/doc/html/pcre2build.html index 764dd2d..9733057 100644 --- a/doc/html/pcre2build.html +++ b/doc/html/pcre2build.html @@ -88,11 +88,11 @@ single-byte characters, or UTF-8 strings. You can also build two other libraries, called libpcre2-16 and libpcre2-32, which process strings that are contained in vectors of 16-bit and 32-bit code units, respectively. These can be interpreted either as single-unit characters or -UTF-16/UTF-32 strings. To build these additional libraries, add one or both of +UTF-16/UTF-32 strings. To build these additional libraries, add one or both of the following to the configure command:
   --enable-pcre16
-  --enable-pcre32 
+  --enable-pcre32
 
If you do not want the 8-bit library, add
@@ -358,7 +358,7 @@ override this value by specifying a run-time option.
 If you add one of
 
   --enable-pcre2test-libreadline
-  --enable-pcre2test-libedit 
+  --enable-pcre2test-libedit
 
to the configure command, pcre2test is linked with the libreadline orlibedit library, respectively, and when its input is @@ -376,8 +376,8 @@ unmodified distribution version of readline is in use), some extra configuration may be necessary. The INSTALL file for libreadline says this:
-  "Readline uses the termcap functions, but does not link with 
-  the termcap or curses library itself, allowing applications 
+  "Readline uses the termcap functions, but does not link with
+  the termcap or curses library itself, allowing applications
   which link with readline the to choose an appropriate library."
 
If your environment has not been set up so that an appropriate library is diff --git a/doc/html/pcre2demo.html b/doc/html/pcre2demo.html index c6f7b64..5919117 100644 --- a/doc/html/pcre2demo.html +++ b/doc/html/pcre2demo.html @@ -25,7 +25,7 @@ pcre2sample documentation for a short discussion ("man pcre2sample" if you have the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is incompatible with the original PCRE API. -There are actually three libraries, each supporting a different code unit +There are actually three libraries, each supporting a different code unit width. This demonstration program uses the 8-bit library. In Unix-like environments, if PCRE2 is installed in your standard system @@ -56,8 +56,8 @@ the following line. */ /* #define PCRE2_STATIC */ -/* This macro must be defined before including pcre2.h. For a program that uses -only one code unit width, it makes it possible to use generic function names +/* This macro must be defined before including pcre2.h. For a program that uses +only one code unit width, it makes it possible to use generic function names such as pcre2_compile(). */ #define PCRE2_CODE_UNIT_WIDTH 8 @@ -141,7 +141,7 @@ subject_length = strlen((char *)subject); re = pcre2_compile( pattern, /* the pattern */ - PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ + PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ 0, /* default options */ &errornumber, /* for error number */ &erroroffset, /* for error offset */ @@ -151,9 +151,9 @@ re = pcre2_compile( if (re == NULL) { - PCRE2_UCHAR buffer[256]; + PCRE2_UCHAR buffer[256]; pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); - printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, + printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, buffer); return 1; } @@ -197,7 +197,7 @@ if (rc < 0) return 1; } -/* Match succeded. Get a pointer to the output vector, where string offsets are +/* Match succeded. Get a pointer to the output vector, where string offsets are stored. */ ovector = pcre2_get_ovector_pointer(match_data); @@ -210,7 +210,7 @@ printf("\nMatch succeeded at offset %d\n", (int)ovector[0]); * captured. * *************************************************************************/ -/* The output vector wasn't big enough. This should not happen, because we used +/* The output vector wasn't big enough. This should not happen, because we used pcre2_match_data_create_from_pattern() above. */ if (rc == 0) @@ -261,7 +261,7 @@ if (namecount <= 0) printf("No named substrings\n"); else &name_entry_size); /* where to put the answer */ /* Now we can scan the table and, for each entry, print the number, the name, - and the substring itself. In the 8-bit library the number is held in two + and the substring itself. In the 8-bit library the number is held in two bytes, most significant first. */ tabptr = name_table; @@ -306,7 +306,7 @@ if (namecount <= 0) printf("No named substrings\n"); else if (!find_all) /* Check for -g */ { - pcre2_match_data_free(match_data); /* Release the memory that was used */ + pcre2_match_data_free(match_data); /* Release the memory that was used */ pcre2_code_free(re); /* for the match data and the pattern. */ return 0; /* Exit the program. */ } @@ -324,7 +324,7 @@ sequence. */ (void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline); crlf_is_newline = newline == PCRE2_NEWLINE_ANY || newline == PCRE2_NEWLINE_CRLF || - newline == PCRE2_NEWLINE_ANYCRLF; + newline == PCRE2_NEWLINE_ANYCRLF; /* Loop for second and subsequent matches */ diff --git a/doc/html/pcre2jit.html b/doc/html/pcre2jit.html index 79c99dc..5232470 100644 --- a/doc/html/pcre2jit.html +++ b/doc/html/pcre2jit.html @@ -71,10 +71,10 @@ performance, there is also a "fast path" API that is JIT-specific.


SIMPLE USE OF JIT

-To make use of the JIT support in the simplest way, all you have to do is to -call pcre2_jit_compile() after successfully compiling a pattern with -pcre2_compile(). This function has two arguments: the first is the -compiled pattern pointer that was returned by pcre2_compile(), and the +To make use of the JIT support in the simplest way, all you have to do is to +call pcre2_jit_compile() after successfully compiling a pattern with +pcre2_compile(). This function has two arguments: the first is the +compiled pattern pointer that was returned by pcre2_compile(), and the second is a set of option bits, which must include at least one of PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.

@@ -239,7 +239,7 @@ non-default JIT stacks might operate:
All the functions described in this section do nothing if JIT is not available, and pcre2_jit_stack_assign() does nothing unless the code argument -is non-NULL and points to a pcre2_code block that has been successfully +is non-NULL and points to a pcre2_code block that has been successfully processed by pcre2_jit_compile().


JIT STACK FAQ
@@ -328,18 +328,18 @@ callback.
   int rc;
   pcre2_code *re;
-  pcre2_match_data *match_data; 
+  pcre2_match_data *match_data;
   pcre2_jit_stack *jit_stack;
 
-  re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, 
+  re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
     &errornumber, &erroffset, NULL);
   /* Check for errors */
   rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
-  /* Check for errors */ 
+  /* Check for errors */
   jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
   /* Check for error (NULL) */
   pcre2_jit_stack_assign(re, NULL, jit_stack);
-  match_data = pcre2_match_data_create(re, 10); 
+  match_data = pcre2_match_data_create(re, 10);
   rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL);
   /* Check results */
   pcre2_free(re);
diff --git a/doc/html/pcre2partial.html b/doc/html/pcre2partial.html
index 463e6df..50edcfd 100644
--- a/doc/html/pcre2partial.html
+++ b/doc/html/pcre2partial.html
@@ -89,15 +89,15 @@ empty string at the end of the subject.
 

When a partial match is returned, the first two elements in the ovector point -to the portion of the subject that was matched. The appearance of \K in the +to the portion of the subject that was matched. The appearance of \K in the pattern has no effect for a partial match. Consider this pattern:

   /abc\K123/
 
If it is matched against "456abc123xyz" the result is a complete match, and the -ovector defines the matched string as "123", because \K resets the "start of -match" point. However, if a partial match is requested and the subject string -is "456abc12", a partial match is found for the string "abc12", because all +ovector defines the matched string as "123", because \K resets the "start of +match" point. However, if a partial match is requested and the subject string +is "456abc12", a partial match is found for the string "abc12", because all these characters are needed for a subsequent re-match with additional characters.

@@ -343,14 +343,14 @@ same point as before. For example, if the pattern "(?<=123)abc" is partially matched against the string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum lookbehind count is 3, so all characters before offset 2 can be discarded. The -value of startoffset for the next match should be 3. When pcre2test -displays a partial match, it indicates the lookbehind characters with '<' +value of startoffset for the next match should be 3. When pcre2test +displays a partial match, it indicates the lookbehind characters with '<' characters:
     re> "(?<=123)abc"
   data> xx123ab\=ph
   Partial match: 123ab
-                 <<< 
+                 <<<
 

diff --git a/doc/html/pcre2pattern.html b/doc/html/pcre2pattern.html index 4c8168b..11d8056 100644 --- a/doc/html/pcre2pattern.html +++ b/doc/html/pcre2pattern.html @@ -145,7 +145,7 @@ Unicode newline sequence. The pcre2api page has further discussion -about newlines, and shows how to set the newline convention when calling +about newlines, and shows how to set the newline convention when calling pcre2_compile().

@@ -218,7 +218,7 @@ corresponding characters in the subject. As a trivial example, the pattern

matches a portion of a subject string that is identical to itself. When caseless matching is specified (the PCRE2_CASELESS option), letters are matched -independently of case. +independently of case.

The power of regular expressions comes from the ability to include alternatives @@ -1191,8 +1191,8 @@ An opening square bracket introduces a character class, terminated by a closing square bracket. A closing square bracket on its own is not special by default. If a closing square bracket is required as a member of the class, it should be the first data character in the class (after an initial circumflex, if present) -or escaped with a backslash. This means that, by default, an empty class cannot -be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing +or escaped with a backslash. This means that, by default, an empty class cannot +be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing square bracket at the start does end the (empty) class.

@@ -1216,7 +1216,7 @@ string. When caseless matching is set, any letters in a class represent both their upper case and lower case versions, so for example, a caseless [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a -caseful version would. +caseful version would.

Characters that might indicate line breaks are never treated in any special way @@ -1341,7 +1341,7 @@ classes by other sequences, as follows: [:alnum:] becomes \p{Xan} [:alpha:] becomes \p{L} [:blank:] becomes \h - [:cntrl:] becomes \p{Cc} + [:cntrl:] becomes \p{Cc} [:digit:] becomes \p{Nd} [:lower:] becomes \p{Ll} [:space:] becomes \p{Xps} @@ -1490,7 +1490,7 @@ match "cataract", "erpillar" or an empty string.
2. It sets up the subpattern as a capturing subpattern. This means that, when the whole pattern matches, the portion of the subject string that matched the -subpattern is passed back to the caller, separately from the portion that +subpattern is passed back to the caller, separately from the portion that matched the whole pattern. (This applies only to the traditional matching function; the DFA matching function does not support capturing.)

@@ -1908,7 +1908,7 @@ at release 5.10. PCRE2 has an optimization that automatically "possessifies" certain simple pattern constructs. For example, the sequence A+B is treated as A++B because there is no point in backtracking into a sequence of A's when B must follow. -This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting +This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting the pattern with (*NO_AUTO_POSSESS).

@@ -2216,7 +2216,7 @@ if the pattern is written as

   ^.*+(?<=abcd)
 
-there can be no backtracking for the .*+ item because of the possessive +there can be no backtracking for the .*+ item because of the possessive quantifier; it can match only the entire string. The subsequent lookbehind assertion does a single test on the last four characters. If it fails, the match fails immediately. For long strings, this approach makes a significant @@ -2720,8 +2720,8 @@ same pair of parentheses when there is a repetition.

PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl code. The feature is called "callout". The caller of PCRE2 provides an external -function by putting its entry point in a match context using the function -pcre2_set_callout() and passing the context to pcre2_match() or +function by putting its entry point in a match context using the function +pcre2_set_callout() and passing the context to pcre2_match() or pcre2_dfa_match(). If no match context is passed, or if the callout entry point is set to NULL, callouts are disabled.

@@ -2961,7 +2961,7 @@ output from pcre2test: re> /(*COMMIT)abc/ data> xyzabc 0: abc - data> + data> re> /(*COMMIT)abc/no_start_optimize data> xyzabc No match @@ -2989,7 +2989,7 @@ as (*COMMIT).

The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE). It is like (*MARK:NAME) in that the name is remembered for passing back to the -caller. However, (*SKIP:NAME) searches only for names set with (*MARK), +caller. However, (*SKIP:NAME) searches only for names set with (*MARK), ignoring those set by (*PRUNE) or (*THEN).

   (*SKIP)
@@ -3041,7 +3041,7 @@ group. If (*THEN) is not inside an alternation, it acts like (*PRUNE).
 

The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN). It is like (*MARK:NAME) in that the name is remembered for passing back to the -caller. However, (*SKIP:NAME) searches only for names set with (*MARK), +caller. However, (*SKIP:NAME) searches only for names set with (*MARK), ignoring those set by (*PRUNE) and (*THEN).

diff --git a/doc/html/pcre2stack.html b/doc/html/pcre2stack.html index aff072f..978f241 100644 --- a/doc/html/pcre2stack.html +++ b/doc/html/pcre2stack.html @@ -103,17 +103,17 @@ PCRE2 to use heap memory instead of stack for remembering back-up points when of how to do this are given in the pcre2build documentation. When built in this way, instead of using the stack, PCRE2 -gets memory for remembering backup points from the heap. By default, the memory -is obtained by calling the system malloc() function, but you can arrange -to supply your own memory management function. For details, see the section -entitled +gets memory for remembering backup points from the heap. By default, the memory +is obtained by calling the system malloc() function, but you can arrange +to supply your own memory management function. For details, see the section +entitled "The match context" in the pcre2api documentation. Since the block sizes are always the same, it may be possible to implement customized a memory handler that is more efficient than the standard -function. The memory blocks obtained for this purpose are retained and re-used -if possible while pcre2_match() is running. They are all freed just +function. The memory blocks obtained for this purpose are retained and re-used +if possible while pcre2_match() is running. They are all freed just before it exits.


diff --git a/doc/html/pcre2syntax.html b/doc/html/pcre2syntax.html index c240ca9..1dd04f2 100644 --- a/doc/html/pcre2syntax.html +++ b/doc/html/pcre2syntax.html @@ -414,7 +414,7 @@ appear. (*LIMIT_MATCH=d) set the match limit to d (decimal number) (*LIMIT_RECURSION=d) set the recursion limit to d (decimal number) (*NOTEMPTY) set PCRE2_NOTEMPTY when matching - (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching + (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching (*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS) (*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE) (*UTF) set appropriate UTF mode for the library in use diff --git a/doc/html/pcre2test.html b/doc/html/pcre2test.html index 592d9e0..0609a41 100644 --- a/doc/html/pcre2test.html +++ b/doc/html/pcre2test.html @@ -476,7 +476,7 @@ about the pattern: /I info show info about compiled pattern hex pattern is coded in hexadecimal jit[=<number>] use JIT - jitverify verify JIT use + jitverify verify JIT use locale=<name> use this locale memory show memory used newline=<type> set newline type @@ -565,7 +565,7 @@ number in the range 0 to 7: 7 all three modes
If no number is given, 7 is assumed. If JIT compilation is successful, the -compiled JIT code will automatically be used when pcre2_match() is run +compiled JIT code will automatically be used when pcre2_match() is run for the appropriate type of match, except when incompatible run-time options are specified. For more details, see the pcre2jit @@ -710,7 +710,7 @@ for a description of their effects. partial_hard (or ph) set PCRE2_PARTIAL_HARD partial_soft (or ps) set PCRE2_PARTIAL_SOFT
-The partial matching modifiers are provided with abbreviations because they +The partial matching modifiers are provided with abbreviations because they appear frequently in tests.

@@ -892,8 +892,8 @@ until it finds the minimum values for each parameter that allow pcre2_match() to complete without error.

-If JIT is being used, only the match limit is relevant. If DFA matching is -being used, neither limit is relevant, and this modifier is ignored (with a +If JIT is being used, only the match limit is relevant. If DFA matching is +being used, neither limit is relevant, and this modifier is ignored (with a warning message).

@@ -939,10 +939,10 @@ appears, though of course it can also be used to set a default in a available for storing matching information. The default is 15.

-At least one pair of offsets is always created by -pcre2_match_data_create(), for matching with PCRE2's native API, so a -value of 0 is the same as 1. However a value of 0 is useful when testing the -POSIX API because it causes regexec() to be called with a NULL capture +At least one pair of offsets is always created by +pcre2_match_data_create(), for matching with PCRE2's native API, so a +value of 0 is the same as 1. However a value of 0 is useful when testing the +POSIX API because it causes regexec() to be called with a NULL capture vector.


THE ALTERNATIVE MATCHING FUNCTION
diff --git a/doc/html/pcre2unicode.html b/doc/html/pcre2unicode.html index bbefd02..52846fb 100644 --- a/doc/html/pcre2unicode.html +++ b/doc/html/pcre2unicode.html @@ -67,7 +67,7 @@ In UTF modes, the dot metacharacter matches one UTF character instead of a single code unit.

-The escape sequence \C can be used to match a single code unit, in a UTF mode, +The escape sequence \C can be used to match a single code unit, in a UTF mode, but its use can lead to some strange effects because it breaks up multi-unit characters (see the description of \C in the pcre2pattern @@ -114,8 +114,8 @@ VALIDITY OF UTF STRINGS

When the PCRE2_UTF option is set, the strings passed as patterns and subjects -are (by default) checked for validity on entry to the relevant functions. -If an invalid UTF string is passed, an error return is given. +are (by default) checked for validity on entry to the relevant functions. +If an invalid UTF string is passed, an error return is given.

UTF-16 and UTF-32 strings can indicate their endianness by special code knows diff --git a/doc/pcre2.3 b/doc/pcre2.3 index aaa71d3..8a31f5d 100644 --- a/doc/pcre2.3 +++ b/doc/pcre2.3 @@ -23,11 +23,11 @@ of Unicode in use can be discovered by running .sp pcre2test -C .P -The three libraries contain identical sets of functions, with names ending in -_8, _16, or _32, respectively (for example, \fBpcre2_compile_8()\fP). However, -by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just +The three libraries contain identical sets of functions, with names ending in +_8, _16, or _32, respectively (for example, \fBpcre2_compile_8()\fP). However, +by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just one code unit width can be written using generic names such as -\fBpcre2_compile()\fP, and the documentation is written assuming that this is +\fBpcre2_compile()\fP, and the documentation is written assuming that this is the case. .P In addition to the Perl-compatible matching function, PCRE2 contains an diff --git a/doc/pcre2.txt b/doc/pcre2.txt index 2301c8a..27ec713 100644 --- a/doc/pcre2.txt +++ b/doc/pcre2.txt @@ -158,8 +158,8 @@ REVISION Last updated: 28 September 2014 Copyright (c) 1997-2014 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2API(3) Library Functions Manual PCRE2API(3) @@ -2529,8 +2529,8 @@ REVISION Last updated: 16 October 2014 Copyright (c) 1997-2014 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3) @@ -2981,8 +2981,8 @@ REVISION Last updated: 28 September 2014 Copyright (c) 1997-2014 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3) @@ -3217,8 +3217,8 @@ REVISION Last updated: 19 October 2014 Copyright (c) 1997-2014 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3) @@ -3403,8 +3403,8 @@ REVISION Last updated: 28 September 2014 Copyright (c) 1997-2014 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2JIT(3) Library Functions Manual PCRE2JIT(3) @@ -3758,8 +3758,8 @@ REVISION Last updated: 29 September 2014 Copyright (c) 1997-2014 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3) @@ -3826,8 +3826,8 @@ REVISION Last updated: 29 September 2014 Copyright (c) 1997-2014 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3) @@ -4045,8 +4045,8 @@ REVISION Last updated: 29 September 2014 Copyright (c) 1997-2014 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3) @@ -4485,8 +4485,8 @@ REVISION Last updated: 14 October 2014 Copyright (c) 1997-2014 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3) @@ -4711,5 +4711,5 @@ REVISION Last updated: 16 September 2014 Copyright (c) 1997-2014 University of Cambridge. ------------------------------------------------------------------------------ - - + + diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index 47351cd..00e92e9 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -250,7 +250,7 @@ to be included in an environment where the value of PCRE2_CODE_UNIT_WIDTH is unknown should also use the real function names. (Unfortunately, it is not possible in C code to save and restore the value of a macro.) .P -If PCRE2_CODE_UNIT_WIDTH is not defined before including \fBpcre2.h\fP, a +If PCRE2_CODE_UNIT_WIDTH is not defined before including \fBpcre2.h\fP, a compiler error occurs. .P When using multiple libraries in an application, you must take care when @@ -392,7 +392,7 @@ section on \fBpcre2_match()\fP options below. .P The choice of newline convention does not affect the interpretation of -the \en or \er escape sequences, nor does it affect what \eR matches, which has +the \en or \er escape sequences, nor does it affect what \eR matches, which has its own separate control. . . @@ -509,7 +509,7 @@ The memory used for a general context should be freed by calling: .SS "The compile context" .rs .sp -A compile context is required if you want to change the default values of any +A compile context is required if you want to change the default values of any of the following compile-time parameters: .sp What \eR matches (Unicode newlines or CR, LF, CRLF only); @@ -518,7 +518,7 @@ of the following compile-time parameters: The compile time nested parentheses limit; An external function for stack checking. .sp -A compile context is also required if you are using custom memory management. +A compile context is also required if you are using custom memory management. If none of these apply, just pass NULL as the context argument of \fIpcre2_compile()\fP. .P @@ -534,8 +534,8 @@ A compile context is created, copied, and freed by the following functions: .B void pcre2_compile_context_free(pcre2_compile_context *\fIccontext\fP); .fi .sp -A compile context is created with default values for its parameters. These can -be changed by calling the following functions, which return 0 on success, or +A compile context is created with default values for its parameters. These can +be changed by calling the following functions, which return 0 on success, or PCRE2_ERROR_BADDATA if invalid data is detected. .sp .nf @@ -543,11 +543,11 @@ PCRE2_ERROR_BADDATA if invalid data is detected. .B " uint32_t \fIvalue\fP);" .fi .sp -The value must be PCRE2_BSR_ANYCRLF, to specify that \eR matches only CR, LF, -or CRLF, or PCRE2_BSR_UNICODE, to specify that \eR matches any Unicode line -ending sequence. The value of this parameter does not affect what is compiled; +The value must be PCRE2_BSR_ANYCRLF, to specify that \eR matches only CR, LF, +or CRLF, or PCRE2_BSR_UNICODE, to specify that \eR matches any Unicode line +ending sequence. The value of this parameter does not affect what is compiled; it is just saved with the compiled pattern. The value is used by the JIT -compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and +compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and \fIpcre2_dfa_match()\fP. .sp .nf @@ -555,7 +555,7 @@ compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and .B " const unsigned char *\fItables\fP);" .fi .sp -The value must be the result of a call to \fIpcre2_maketables()\fP, whose only +The value must be the result of a call to \fIpcre2_maketables()\fP, whose only argument is a general context. This function builds a set of character tables in the current locale. .sp @@ -564,9 +564,9 @@ in the current locale. .B " uint32_t \fIvalue\fP);" .fi .sp -This specifies which characters or character sequences are to be recognized as +This specifies which characters or character sequences are to be recognized as newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only), -PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character +PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or PCRE2_NEWLINE_ANY (any Unicode newline sequence). .P @@ -591,7 +591,7 @@ using up too much system stack when being compiled. .fi .sp There is at least one application that runs PCRE2 in threads with very limited -system stack, where running out of stack is to be avoided at all costs. The +system stack, where running out of stack is to be avoided at all costs. The parenthesis limit above cannot take account of how much stack is actually available. For a finer control, you can supply a function that is called whenever \fBpcre2_compile()\fP starts to compile a parenthesized part of a @@ -603,20 +603,20 @@ function should return zero if all is well, or non-zero to force an error. .SS "The match context" .rs .sp -A match context is required if you want to change the default values of any +A match context is required if you want to change the default values of any of the following match-time parameters: .sp What \eR matches (Unicode newlines or CR, LF, CRLF only); A callout function; - The limit for calling \fImatch()\fP; + The limit for calling \fImatch()\fP; The limit for calling \fImatch()\fP recursively; The newline character sequence; .sp -A match context is also required if you are using custom memory management. -If none of these apply, just pass NULL as the context argument of +A match context is also required if you are using custom memory management. +If none of these apply, just pass NULL as the context argument of \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP, or \fBpcre2_jit_match()\fP. -Changing the newline value or what \eR matches at match time disables the use -of JIT via \fBpcre2_match()\fP. +Changing the newline value or what \eR matches at match time disables the use +of JIT via \fBpcre2_match()\fP. .P A match context is created, copied, and freed by the following functions: .sp @@ -630,8 +630,8 @@ A match context is created, copied, and freed by the following functions: .B void pcre2_match_context_free(pcre2_match_context *\fImcontext\fP); .fi .sp -A match context is created with default values for its parameters. These can -be changed by calling the following functions, which return 0 on success, or +A match context is created with default values for its parameters. These can +be changed by calling the following functions, which return 0 on success, or PCRE2_ERROR_BADDATA if invalid data is detected. .sp .nf @@ -662,7 +662,7 @@ calls repeatedly (sometimes recursively). The limit set by \fImatch_limit\fP is imposed on the number of times this function is called during a match, which has the effect of limiting the amount of backtracking that can take place. For patterns that are not anchored, the count restarts from zero for each position -in the subject string. This limit is not relevant to \fBpcre2_dfa_match()\fP, +in the subject string. This limit is not relevant to \fBpcre2_dfa_match()\fP, which ignores it. .P When \fBpcre2_match()\fP is called with a pattern that was successfully studied @@ -698,7 +698,7 @@ This limit is of use only if it is set smaller than \fImatch_limit\fP. Limiting the recursion depth limits the amount of system stack that can be used, or, when PCRE2 has been compiled to use memory on the heap instead of the stack, the amount of heap memory that can be used. This limit is not relevant, -and is ignored, when matching is done using JIT compiled code or by the +and is ignored, when matching is done using JIT compiled code or by the \fBpcre2_dfa_match()\fP function. .P The default value for \fIrecursion_limit\fP can be set when PCRE2 is built; the @@ -720,9 +720,9 @@ limit is set, less than the default. .B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);" .fi .sp -This function sets up two additional custom memory management functions for use +This function sets up two additional custom memory management functions for use by \fBpcre2_match()\fP when PCRE2 is compiled to use the heap for remembering -backtracking data, instead of recursive function calls that use the system +backtracking data, instead of recursive function calls that use the system stack. There is a discussion about PCRE2's stack usage in the .\" HREF \fBpcre2stack\fP @@ -738,7 +738,7 @@ limited stacks. Because of the greater use of memory management, general custom memory functions are provided so that special-purpose external code can be used for this case, because the memory blocks are all the same size. The blocks are retained by \fBpcre2_match()\fP until it is about to exit -so that they can be re-used when possible during the match. In the absence of +so that they can be re-used when possible during the match. In the absence of these functions, the normal custom memory management functions are used, if supplied, otherwise the system functions. . @@ -760,7 +760,7 @@ required. The second argument is a pointer to memory into which the information is placed. If NULL is passed, the function returns the amount of memory that is needed for the requested information. For calls that return numerical values, the value is in bytes; when requesting these values, \fIwhere\fP should point -to appropriately aligned memory. For calls that return strings, the required +to appropriately aligned memory. For calls that return strings, the required length is given in code units, not counting the terminating zero. .P When requesting information, the returned value from \fBpcre2_config()\fP is @@ -783,7 +783,7 @@ compiling is available; otherwise it is set to zero. PCRE2_CONFIG_JITTARGET .sp The \fIwhere\fP argument should point to a buffer that is at least 48 code -units long. (The exact length needed can be found by calling +units long. (The exact length needed can be found by calling \fBpcre2_config()\fP with \fBwhere\fP set to NULL.) The buffer is filled with a string that contains the name of the architecture for which the JIT compiler is configured, for example "x86 32bit (little endian + unaligned)". If JIT support @@ -794,9 +794,9 @@ the string, in code units, is returned. .sp The output is an integer that contains the number of bytes used for internal linkage in compiled regular expressions. When PCRE2 is configured, the value -can be set to 2, 3, or 4, with the default being 2. This is the value that is -returned by \fBpcre2_config()\fP. However, when the 16-bit library is compiled, -a value of 3 is rounded up to 4, and when the 32-bit library is compiled, +can be set to 2, 3, or 4, with the default being 2. This is the value that is +returned by \fBpcre2_config()\fP. However, when the 16-bit library is compiled, +a value of 3 is rounded up to 4, and when the 32-bit library is compiled, internal linkages always use 4 bytes, so the configured value is not relevant. .P The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all @@ -820,7 +820,7 @@ that is recognized as meaning "newline". The values are: 3 Carriage return, linefeed (CRLF) 4 Any Unicode line ending 5 Any of CR, LF, or CRLF -.sp +.sp The default should normally correspond to the standard sequence for your operating system. .sp @@ -849,7 +849,7 @@ compiled. The output is zero if PCRE2 was compiled to use blocks of data on the heap instead of recursive function calls. .sp PCRE2_CONFIG_UNICODE_VERSION -.sp +.sp The \fIwhere\fP argument should point to a buffer that is at least 24 code units long. (The exact length needed can be found by calling \fBpcre2_config()\fP with \fBwhere\fP set to NULL.) If PCRE2 has been compiled @@ -884,15 +884,15 @@ units) is returned. .B pcre2_code_free(pcre2_code *\fIcode\fP); .fi .P -This function compiles a pattern, defined by a pointer to a string of code -units and a length, into an internal form. If the pattern is zero-terminated, -the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a -pointer to a block of memory that contains the compiled pattern and related -data. The caller must free the memory by calling \fBpcre2_code_free()\fP when +This function compiles a pattern, defined by a pointer to a string of code +units and a length, into an internal form. If the pattern is zero-terminated, +the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a +pointer to a block of memory that contains the compiled pattern and related +data. The caller must free the memory by calling \fBpcre2_code_free()\fP when it is no longer needed. .P -If the compile context argument \fIccontext\fP is NULL, the memory is obtained -by calling \fBmalloc()\fP. Otherwise, it is obtained from the same memory +If the compile context argument \fIccontext\fP is NULL, the memory is obtained +by calling \fBmalloc()\fP. Otherwise, it is obtained from the same memory function that was used for the compile context. .P The \fIoptions\fP argument contains various bit settings that affect the @@ -903,14 +903,14 @@ within the pattern (see the detailed description in the .\" HREF \fBpcre2pattern\fP .\" -documentation). +documentation). .P For those options that can be different in different parts of the pattern, the contents of the \fIoptions\fP argument specifies their settings at the start of compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at the time of matching as well as at compile time. .P -Other, less frequently required compile-time parameters (for example, the +Other, less frequently required compile-time parameters (for example, the newline setting) can be provided in a compile context (as described .\" HTML .\" @@ -936,10 +936,10 @@ This code fragment shows a typical straightforward call to .sp pcre2_code *re; PCRE2_SIZE erroffset; - int errorcode; + int errorcode; re = pcre2_compile( "^A.*Z", /* the pattern */ - PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */ + PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */ 0, /* default options */ &errorcode, /* for error code */ &erroffset, /* for error offset */ @@ -958,14 +958,14 @@ Perl. .sp PCRE2_ALLOW_EMPTY_CLASS .sp -By default, for compatibility with Perl, a closing square bracket that -immediately follows an opening one is treated as a data character for the -class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which -therefore contains no characters and so can never match. +By default, for compatibility with Perl, a closing square bracket that +immediately follows an opening one is treated as a data character for the +class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which +therefore contains no characters and so can never match. .sp PCRE2_ALT_BSUX .sp -This option request alternative handling of three escape sequences, which +This option request alternative handling of three escape sequences, which makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set: .P (1) \eU matches an upper case "U" character; by default \eU causes a compile @@ -996,7 +996,7 @@ documentation. .sp If this bit is set, letters in the pattern match both upper and lower case letters in the subject. It is equivalent to Perl's /i option, and it can be -changed within a pattern by a (?i) option setting. +changed within a pattern by a (?i) option setting. .sp PCRE2_DOLLAR_ENDONLY .sp @@ -1052,7 +1052,7 @@ sequence at the start of the pattern, as described in the section entitled .\" "Newline conventions" .\" -in the \fBpcre2pattern\fP documentation. A default is defined when PCRE2 is +in the \fBpcre2pattern\fP documentation. A default is defined when PCRE2 is built. .sp PCRE2_FIRSTLINE @@ -1067,7 +1067,7 @@ If this option is set, a back reference to an unset subpattern group matches an empty string (by default this causes the current matching alternative to fail). A pattern such as (\e1)(a) succeeds when this option is set (assuming it can find an "a" in the subject), whereas it fails by default, for Perl -compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka +compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka JavaScript). .sp PCRE2_MULTILINE @@ -1091,10 +1091,10 @@ occurrences of ^ or $ in a pattern, setting PCRE2_MULTILINE has no effect. PCRE2_NEVER_UCP .sp This option locks out the use of Unicode properties for handling \eB, \eb, \eD, -\ed, \eS, \es, \eW, \ew, and some of the POSIX character classes, as described -for the PCRE2_UCP option below. In particular, it prevents the creator of the -pattern from enabling this facility by starting the pattern with (*UCP). This -may be useful in applications that process patterns from external sources. The +\ed, \eS, \es, \eW, \ew, and some of the POSIX character classes, as described +for the PCRE2_UCP option below. In particular, it prevents the creator of the +pattern from enabling this facility by starting the pattern with (*UCP). This +may be useful in applications that process patterns from external sources. The option combination PCRE_UCP and PCRE_NEVER_UCP causes an error. .sp PCRE2_NEVER_UTF @@ -1167,7 +1167,7 @@ pattern (*MARK:A)(X|Y) .sp The minimum length for a match is one character. If the subject is "ABC", there -will be attempts to match "ABC", "BC", and "C". An attempt to match an empty +will be attempts to match "ABC", "BC", and "C". An attempt to match an empty string at the end of the subject does not take place, because PCRE2 knows that the subject is now too short, and so the (*MARK) is never encountered. In this case, the optimization does not affect the overall match result, which is still @@ -1194,7 +1194,7 @@ in the .\" HREF \fBpcre2unicode\fP .\" -document. +document. If an invalid UTF sequence is found, \fBpcre2_compile()\fP returns a negative error code. .P @@ -1385,9 +1385,9 @@ The possible values for the second argument are defined in \fBpcre2.h\fP, and are as follows: .sp PCRE2_INFO_ALLOPTIONS - PCRE2_INFO_ARGOPTIONS + PCRE2_INFO_ARGOPTIONS .sp -Return a copy of the pattern's options. The third argument should point to a +Return a copy of the pattern's options. The third argument should point to a \fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns the compile options as modified by any top-level option settings at the start @@ -1406,7 +1406,7 @@ alternatives begin with one of the following: .* if PCRE2_DOTALL is set and there are no back references to the subpattern in which .* appears .sp -For such patterns, the PCRE2_ANCHORED bit is set in the options returned for +For such patterns, the PCRE2_ANCHORED bit is set in the options returned for PCRE2_INFO_ALLOPTIONS. .sp PCRE2_INFO_BACKREFMAX @@ -1490,7 +1490,7 @@ return zero. The third argument should point to a \fBsize_t\fP variable. .sp Returns 1 if there is a rightmost literal code unit that must exist in any matched string, other than at its start. The third argument should point to an -\fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is +\fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is returned, the code unit value itself can be retrieved using PCRE2_INFO_LASTCODEUNIT. .P @@ -1617,7 +1617,7 @@ values are: 3 Carriage return, linefeed (CRLF) 4 Any Unicode line ending 5 Any of CR, LF, or CRLF -.sp +.sp The default can be overridden when a pattern is matched. .sp PCRE2_INFO_RECURSIONLIMIT @@ -1652,11 +1652,11 @@ pattern with the JIT compiler does not alter the value returned by this option. .B void pcre2_match_data_free(pcre2_match_data *\fImatch_data\fP); .fi .P -Information about successful and unsuccessful matches is placed in a match +Information about successful and unsuccessful matches is placed in a match data block, which is an opaque structure that is accessed by function calls. In particular, the match data block contains a vector of offsets into the subject string that define the matched part of the subject and any substrings that were -capured. This is know as the \fIovector\fP. +capured. This is know as the \fIovector\fP. .P Before calling \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP you must create a match data block by calling one of the creation functions above. For @@ -1669,11 +1669,11 @@ pair is imposed by \fBpcre2_match_data_create()\fP, so it is always possible to return the overall matched string. .P For \fBpcre2_match_data_create_from_pattern()\fP, the first argument is a -pointer to a compiled pattern. In this case the ovector is created to be +pointer to a compiled pattern. In this case the ovector is created to be exactly the right size to hold all the substrings a pattern might capture. .P -The second argument of both these functions ia a pointer to a general context, -which can specify custom memory management for obtaining the memory for the +The second argument of both these functions ia a pointer to a general context, +which can specify custom memory management for obtaining the memory for the match data block. If you are not using custom memory management, pass NULL. .P A match data block can be used many times, with the same or different compiled @@ -1729,8 +1729,8 @@ Here is an example of a simple call to \fBpcre2_match()\fP: match_data, /* the match data block */ NULL); /* a match context; NULL means use defaults */ .sp -If the subject string is zero-terminated, the length can be given as -PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common +If the subject string is zero-terminated, the length can be given as +PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common matching parameters are to be changed. For details, see the section on .\" HTML .\" @@ -1746,7 +1746,7 @@ The subject string is passed to \fBpcre2_match()\fP as a pointer in \fIsubject\fP, a length in \fIlength\fP, and a starting offset in \fIstartoffset\fP. The length and offset are in code units, not characters. That is, they are in bytes for the 8-bit library, 16-bit code units for the -16-bit library, and 32-bit code units for the 32-bit library, whether or not +16-bit library, and 32-bit code units for the 32-bit library, whether or not UTF processing is enabled. .P If \fIstartoffset\fP is greater than the length of the subject, @@ -1755,7 +1755,7 @@ zero, the search for a match starts at the beginning of the subject, and this is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset must point to the start of a character, or to the end of the subject (in UTF-32 mode, one code unit equals one character, so all offsets are valid). Like the -pattern string, the subject may contain binary zeroes. +pattern string, the subject may contain binary zeroes. .P A non-zero starting offset is useful when searching for another match in the same subject by calling \fBpcre2_match()\fP again after a previous success. @@ -1816,7 +1816,7 @@ JIT matching is disabled and the normal interpretive code in The PCRE2_ANCHORED option limits \fBpcre2_match()\fP to matching at the first matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out to be anchored by virtue of its contents, it cannot be made unachored at -matching time. Note that setting the option at match time disables JIT +matching time. Note that setting the option at match time disables JIT matching. .sp PCRE2_NOTBOL @@ -1880,13 +1880,13 @@ in the .\" HREF \fBpcre2unicode\fP .\" -page. +page. .P If you know that your subject is valid, and you want to skip these checks for performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling \fBpcre2_match()\fP. You might want to do this for the second and subsequent calls to \fBpcre2_match()\fP if you are making repeated calls to find all the -matches in a single subject string. +matches in a single subject string. .P NOTE: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid string as a subject, or an invalid value of \fIstartoffset\fP, is undefined. Your @@ -1921,10 +1921,10 @@ documentation. . .SH "NEWLINE HANDLING WHEN MATCHING" .rs -.sp -When PCRE2 is built, a default newline convention is set; this is usually the -standard convention for the operating system. The default can be overridden in -either a +.sp +When PCRE2 is built, a default newline convention is set; this is usually the +standard convention for the operating system. The default can be overridden in +either a .\" HTML .\" compile context @@ -1972,7 +1972,7 @@ valid newline sequence and explicit \er or \en escapes appear in the pattern. .fi .P In general, a pattern matches a certain portion of the subject, and in -addition, further substrings from the subject may be picked out by +addition, further substrings from the subject may be picked out by parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's book, this is called "capturing" in what follows, and the phrase "capturing subpattern" is used for a fragment of a pattern that picks out a substring. @@ -1982,14 +1982,14 @@ used to find out how many capturing subpatterns there are in a compiled pattern. .P The overall matched string and any captured substrings are returned to the -caller via a vector of PCRE2_SIZE values, called the \fBovector\fP. This is +caller via a vector of PCRE2_SIZE values, called the \fBovector\fP. This is contained within the .\" HTML .\" match data block. .\" -You can obtain direct access to the ovector by calling -\fBpcre2_get_ovector_pointer()\fP to find its address, and +You can obtain direct access to the ovector by calling +\fBpcre2_get_ovector_pointer()\fP to find its address, and \fBpcre2_get_ovector_count()\fP to find the number of pairs of values it contains. Alternatively, you can use the auxiliary functions for accessing captured substrings @@ -2065,17 +2065,17 @@ had. .B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP); .fi .P -In addition to the offsets in the ovector, other information about a match is +In addition to the offsets in the ovector, other information about a match is retained in the match data block and can be retrieved by the above functions. .P When a (*MARK) name is to be passed back, \fBpcre2_get_mark()\fP returns a -pointer to the zero-terminated name, which is within the compiled pattern. -Otherwise NULL is returned. A (*MARK) name may be available after a failed +pointer to the zero-terminated name, which is within the compiled pattern. +Otherwise NULL is returned. A (*MARK) name may be available after a failed match or a partial match, as well as after a successful one. .P The offset of the character at which the successful match started is returned by \fBpcre2_get_startchar()\fP. This can be different to the value of -\fIovector[0]\fP if the pattern contains the \eK escape sequence. Note, +\fIovector[0]\fP if the pattern contains the \eK escape sequence. Note, however, the \eK has no effect for a partial match. . . @@ -2083,7 +2083,7 @@ however, the \eK has no effect for a partial match. .SS "Error return values from \fBpcre2_match()\fP" .rs .sp -If \fBpcre2_match()\fP fails, it returns a negative number. This can be +If \fBpcre2_match()\fP fails, it returns a negative number. This can be converted to a text string by calling \fBpcre2_get_error_message()\fP. Negative error codes are also returned by other functions, and are documented with them. The codes are given names in the header file. If UTF checking is in force and @@ -2237,7 +2237,7 @@ extracting it by calling \fBpcre2_substring_length_bynumber()\fP. The first argument is a pointer to the match data block, the second is the group number, and the third is a pointer to a variable into which the length is placed. .P -The \fBpcre2_substring_copy_bynumber()\fP function copies one string into a +The \fBpcre2_substring_copy_bynumber()\fP function copies one string into a supplied buffer, whereas \fBpcre2_substring_get_bynumber()\fP copies it into new memory, obtained using the same memory allocation function that was used for the match data block. The first two arguments of these functions are a @@ -2250,10 +2250,10 @@ the buffer and a pointer to a variable that contains its length in code units. This is updated to contain the actual number of code units used, excluding the terminating zero. .P -For \fBpcre2_substring_get_bynumber()\fP the third and fourth arguments point -to variables that are updated with a pointer to the new memory and the number -of code units that comprise the substring, again excluding the terminating -zero. When the substring is no longer needed, the memory should be freed by +For \fBpcre2_substring_get_bynumber()\fP the third and fourth arguments point +to variables that are updated with a pointer to the new memory and the number +of code units that comprise the substring, again excluding the terminating +zero. When the substring is no longer needed, the memory should be freed by calling \fBpcre2_substring_free()\fP. .P The return value from these functions is zero for success, or one of these @@ -2266,9 +2266,9 @@ attempt to get memory failed for \fBpcre2_substring_get_bynumber()\fP. .sp PCRE2_ERROR_NOSUBSTRING .sp -No substring with the given number was captured. This could be because there is -no capturing group of that number in the pattern, or because the group with -that number did not participate in the match, or because the ovector was too +No substring with the given number was captured. This could be because there is +no capturing group of that number in the pattern, or because the group with +that number did not participate in the match, or because the ovector was too small to capture that group. . . @@ -2284,7 +2284,7 @@ small to capture that group. .P The \fBpcre2_substring_list_get()\fP function extracts all available substrings and builds a list of pointers to them, and a second list that contains their -lengths (in code units), excluding a terminating zero that is added to each of +lengths (in code units), excluding a terminating zero that is added to each of them. All this is done in a single block of memory that is obtained using the same memory allocation function that was used to get the match data block. .P @@ -2295,7 +2295,7 @@ NULL pointer. The address of the list of lengths is returned via therefore need the lengths, you may supply NULL as the \fBlengthsptr\fP argument to disable the creation of a list of lengths. The yield of the function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block -could not be obtained. When the list is no longer needed, it should be freed by +could not be obtained. When the list is no longer needed, it should be freed by calling \fBpcre2_substring_list_free()\fP. .P If this function encounters a substring that is unset, which can happen when @@ -2340,7 +2340,7 @@ name. .P Given the number, you can extract the substring directly, or use one of the functions described in the previous section. For convenience, there are also -"byname" functions that correspond to the "bynumber" functions, the only +"byname" functions that correspond to the "bynumber" functions, the only difference being that the second argument is a name instead of a number. However, if PCRE2_DUPNAMES is set and there are duplicate names, the behaviour may not be what you want (see the next section). @@ -2413,7 +2413,7 @@ numbers, and hence the captured data. .sp The traditional matching function uses a similar algorithm to Perl, which stops when it finds the first match, starting at a given point in the subject. If you -want to find all possible matches, or the longest possible match at a given +want to find all possible matches, or the longest possible match at a given position, consider using the alternative matching function (see below) instead. If you cannot use the alternative function, you can kludge it up by making use of the callout facility, which is described in the @@ -2614,8 +2614,8 @@ fail, this error is given. .SH "SEE ALSO" .rs .sp -\fBpcre2build\fP(3), \fBpcre2libs\fP(3), \fBpcre2callout\fP(3), -\fBpcre2matching\fP(3), \fBpcre2partial\fP(3), \fBpcre2posix\fP(3), +\fBpcre2build\fP(3), \fBpcre2libs\fP(3), \fBpcre2callout\fP(3), +\fBpcre2matching\fP(3), \fBpcre2partial\fP(3), \fBpcre2posix\fP(3), \fBpcre2demo(3)\fP, \fBpcre2sample\fP(3), \fBpcre2stack\fP(3). . . diff --git a/doc/pcre2build.3 b/doc/pcre2build.3 index 2146777..561cc35 100644 --- a/doc/pcre2build.3 +++ b/doc/pcre2build.3 @@ -71,11 +71,11 @@ single-byte characters, or UTF-8 strings. You can also build two other libraries, called \fBlibpcre2-16\fP and \fBlibpcre2-32\fP, which process strings that are contained in vectors of 16-bit and 32-bit code units, respectively. These can be interpreted either as single-unit characters or -UTF-16/UTF-32 strings. To build these additional libraries, add one or both of +UTF-16/UTF-32 strings. To build these additional libraries, add one or both of the following to the \fBconfigure\fP command: .sp --enable-pcre16 - --enable-pcre32 + --enable-pcre32 .sp If you do not want the 8-bit library, add .sp @@ -367,7 +367,7 @@ override this value by specifying a run-time option. If you add one of .sp --enable-pcre2test-libreadline - --enable-pcre2test-libedit + --enable-pcre2test-libedit .sp to the \fBconfigure\fP command, \fBpcre2test\fP is linked with the \fBlibreadline\fP or\fBlibedit\fP library, respectively, and when its input is @@ -384,8 +384,8 @@ unmodified distribution version of readline is in use), some extra configuration may be necessary. The INSTALL file for \fBlibreadline\fP says this: .sp - "Readline uses the termcap functions, but does not link with - the termcap or curses library itself, allowing applications + "Readline uses the termcap functions, but does not link with + the termcap or curses library itself, allowing applications which link with readline the to choose an appropriate library." .sp If your environment has not been set up so that an appropriate library is diff --git a/doc/pcre2callout.3 b/doc/pcre2callout.3 index 620f455..4089bcc 100644 --- a/doc/pcre2callout.3 +++ b/doc/pcre2callout.3 @@ -16,9 +16,9 @@ PCRE2 provides a feature called "callout", which is a means of temporarily passing control to the caller of PCRE2 in the middle of pattern matching. The caller of PCRE2 provides an external function by putting its entry point in a match context (see \fBpcre2_set_callout()\fP) in the -.\" HREF -\fBpcre2api\fP -.\" +.\" HREF +\fBpcre2api\fP +.\" documentation). .P Within a regular expression, (?C) indicates the points at which the external diff --git a/doc/pcre2demo.3 b/doc/pcre2demo.3 index bf3f5fb..5deed0a 100644 --- a/doc/pcre2demo.3 +++ b/doc/pcre2demo.3 @@ -25,7 +25,7 @@ pcre2sample documentation for a short discussion ("man pcre2sample" if you have the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is incompatible with the original PCRE API. -There are actually three libraries, each supporting a different code unit +There are actually three libraries, each supporting a different code unit width. This demonstration program uses the 8-bit library. In Unix-like environments, if PCRE2 is installed in your standard system @@ -56,8 +56,8 @@ the following line. */ /* #define PCRE2_STATIC */ -/* This macro must be defined before including pcre2.h. For a program that uses -only one code unit width, it makes it possible to use generic function names +/* This macro must be defined before including pcre2.h. For a program that uses +only one code unit width, it makes it possible to use generic function names such as pcre2_compile(). */ #define PCRE2_CODE_UNIT_WIDTH 8 @@ -141,7 +141,7 @@ subject_length = strlen((char *)subject); re = pcre2_compile( pattern, /* the pattern */ - PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ + PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ 0, /* default options */ &errornumber, /* for error number */ &erroroffset, /* for error offset */ @@ -151,9 +151,9 @@ re = pcre2_compile( if (re == NULL) { - PCRE2_UCHAR buffer[256]; + PCRE2_UCHAR buffer[256]; pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); - printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset, + printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset, buffer); return 1; } @@ -197,7 +197,7 @@ if (rc < 0) return 1; } -/* Match succeded. Get a pointer to the output vector, where string offsets are +/* Match succeded. Get a pointer to the output vector, where string offsets are stored. */ ovector = pcre2_get_ovector_pointer(match_data); @@ -210,7 +210,7 @@ printf("\enMatch succeeded at offset %d\en", (int)ovector[0]); * captured. * *************************************************************************/ -/* The output vector wasn't big enough. This should not happen, because we used +/* The output vector wasn't big enough. This should not happen, because we used pcre2_match_data_create_from_pattern() above. */ if (rc == 0) @@ -261,7 +261,7 @@ if (namecount <= 0) printf("No named substrings\en"); else &name_entry_size); /* where to put the answer */ /* Now we can scan the table and, for each entry, print the number, the name, - and the substring itself. In the 8-bit library the number is held in two + and the substring itself. In the 8-bit library the number is held in two bytes, most significant first. */ tabptr = name_table; @@ -306,7 +306,7 @@ if (namecount <= 0) printf("No named substrings\en"); else if (!find_all) /* Check for -g */ { - pcre2_match_data_free(match_data); /* Release the memory that was used */ + pcre2_match_data_free(match_data); /* Release the memory that was used */ pcre2_code_free(re); /* for the match data and the pattern. */ return 0; /* Exit the program. */ } @@ -324,7 +324,7 @@ sequence. */ (void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline); crlf_is_newline = newline == PCRE2_NEWLINE_ANY || newline == PCRE2_NEWLINE_CRLF || - newline == PCRE2_NEWLINE_ANYCRLF; + newline == PCRE2_NEWLINE_ANYCRLF; /* Loop for second and subsequent matches */ diff --git a/doc/pcre2jit.3 b/doc/pcre2jit.3 index 7151aa6..60e5aa9 100644 --- a/doc/pcre2jit.3 +++ b/doc/pcre2jit.3 @@ -48,10 +48,10 @@ performance, there is also a "fast path" API that is JIT-specific. .SH "SIMPLE USE OF JIT" .rs .sp -To make use of the JIT support in the simplest way, all you have to do is to -call \fBpcre2_jit_compile()\fP after successfully compiling a pattern with -\fBpcre2_compile()\fP. This function has two arguments: the first is the -compiled pattern pointer that was returned by \fBpcre2_compile()\fP, and the +To make use of the JIT support in the simplest way, all you have to do is to +call \fBpcre2_jit_compile()\fP after successfully compiling a pattern with +\fBpcre2_compile()\fP. This function has two arguments: the first is the +compiled pattern pointer that was returned by \fBpcre2_compile()\fP, and the second is a set of option bits, which must include at least one of PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT. .P @@ -221,7 +221,7 @@ non-default JIT stacks might operate: .sp All the functions described in this section do nothing if JIT is not available, and \fBpcre2_jit_stack_assign()\fP does nothing unless the \fBcode\fP argument -is non-NULL and points to a \fBpcre2_code\fP block that has been successfully +is non-NULL and points to a \fBpcre2_code\fP block that has been successfully processed by \fBpcre2_jit_compile()\fP. . . @@ -302,18 +302,18 @@ callback. .sp int rc; pcre2_code *re; - pcre2_match_data *match_data; + pcre2_match_data *match_data; pcre2_jit_stack *jit_stack; .sp - re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, + re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, &errornumber, &erroffset, NULL); /* Check for errors */ rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE); - /* Check for errors */ + /* Check for errors */ jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024); /* Check for error (NULL) */ pcre2_jit_stack_assign(re, NULL, jit_stack); - match_data = pcre2_match_data_create(re, 10); + match_data = pcre2_match_data_create(re, 10); rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL); /* Check results */ pcre2_free(re); diff --git a/doc/pcre2partial.3 b/doc/pcre2partial.3 index faad43c..b47e864 100644 --- a/doc/pcre2partial.3 +++ b/doc/pcre2partial.3 @@ -64,15 +64,15 @@ matched; without such a restriction there would always be a partial match of an empty string at the end of the subject. .P When a partial match is returned, the first two elements in the ovector point -to the portion of the subject that was matched. The appearance of \eK in the +to the portion of the subject that was matched. The appearance of \eK in the pattern has no effect for a partial match. Consider this pattern: .sp /abc\eK123/ .sp If it is matched against "456abc123xyz" the result is a complete match, and the -ovector defines the matched string as "123", because \eK resets the "start of -match" point. However, if a partial match is requested and the subject string -is "456abc12", a partial match is found for the string "abc12", because all +ovector defines the matched string as "123", because \eK resets the "start of +match" point. However, if a partial match is requested and the subject string +is "456abc12", a partial match is found for the string "abc12", because all these characters are needed for a subsequent re-match with additional characters. .P @@ -316,14 +316,14 @@ same point as before. For example, if the pattern "(?<=123)abc" is partially matched against the string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum lookbehind count is 3, so all characters before offset 2 can be discarded. The -value of \fBstartoffset\fP for the next match should be 3. When \fBpcre2test\fP -displays a partial match, it indicates the lookbehind characters with '<' +value of \fBstartoffset\fP for the next match should be 3. When \fBpcre2test\fP +displays a partial match, it indicates the lookbehind characters with '<' characters: .sp re> "(?<=123)abc" data> xx123ab\e=ph Partial match: 123ab - <<< + <<< .P 3. Because a partial match must always contain at least one character, what might be considered a partial match of an empty string actually gives a "no diff --git a/doc/pcre2pattern.3 b/doc/pcre2pattern.3 index 8e10337..f2bdbb3 100644 --- a/doc/pcre2pattern.3 +++ b/doc/pcre2pattern.3 @@ -118,7 +118,7 @@ page has .\" further discussion .\" -about newlines, and shows how to set the newline convention when calling +about newlines, and shows how to set the newline convention when calling \fBpcre2_compile()\fP. .P It is also possible to specify a newline convention by starting a pattern @@ -196,7 +196,7 @@ corresponding characters in the subject. As a trivial example, the pattern .sp matches a portion of a subject string that is identical to itself. When caseless matching is specified (the PCRE2_CASELESS option), letters are matched -independently of case. +independently of case. .P The power of regular expressions comes from the ability to include alternatives and repetitions in the pattern. These are encoded in the pattern by the use of @@ -1199,8 +1199,8 @@ An opening square bracket introduces a character class, terminated by a closing square bracket. A closing square bracket on its own is not special by default. If a closing square bracket is required as a member of the class, it should be the first data character in the class (after an initial circumflex, if present) -or escaped with a backslash. This means that, by default, an empty class cannot -be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing +or escaped with a backslash. This means that, by default, an empty class cannot +be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing square bracket at the start does end the (empty) class. .P A character class matches a single character in the subject. A matched @@ -1221,7 +1221,7 @@ string. When caseless matching is set, any letters in a class represent both their upper case and lower case versions, so for example, a caseless [aeiou] matches "A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a -caseful version would. +caseful version would. .P Characters that might indicate line breaks are never treated in any special way when matching character classes, whatever line-ending sequence is in use, and @@ -1340,7 +1340,7 @@ classes by other sequences, as follows: [:alnum:] becomes \ep{Xan} [:alpha:] becomes \ep{L} [:blank:] becomes \eh - [:cntrl:] becomes \ep{Cc} + [:cntrl:] becomes \ep{Cc} [:digit:] becomes \ep{Nd} [:lower:] becomes \ep{Ll} [:space:] becomes \ep{Xps} @@ -1496,7 +1496,7 @@ match "cataract", "erpillar" or an empty string. .sp 2. It sets up the subpattern as a capturing subpattern. This means that, when the whole pattern matches, the portion of the subject string that matched the -subpattern is passed back to the caller, separately from the portion that +subpattern is passed back to the caller, separately from the portion that matched the whole pattern. (This applies only to the traditional matching function; the DFA matching function does not support capturing.) .P @@ -1916,7 +1916,7 @@ at release 5.10. PCRE2 has an optimization that automatically "possessifies" certain simple pattern constructs. For example, the sequence A+B is treated as A++B because there is no point in backtracking into a sequence of A's when B must follow. -This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting +This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting the pattern with (*NO_AUTO_POSSESS). .P When a pattern contains an unlimited repeat inside a subpattern that can itself @@ -2238,7 +2238,7 @@ if the pattern is written as .sp ^.*+(?<=abcd) .sp -there can be no backtracking for the .*+ item because of the possessive +there can be no backtracking for the .*+ item because of the possessive quantifier; it can match only the entire string. The subsequent lookbehind assertion does a single test on the last four characters. If it fails, the match fails immediately. For long strings, this approach makes a significant @@ -2754,8 +2754,8 @@ same pair of parentheses when there is a repetition. .P PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl code. The feature is called "callout". The caller of PCRE2 provides an external -function by putting its entry point in a match context using the function -\fBpcre2_set_callout()\fP and passing the context to \fBpcre2_match()\fP or +function by putting its entry point in a match context using the function +\fBpcre2_set_callout()\fP and passing the context to \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP. If no match context is passed, or if the callout entry point is set to NULL, callouts are disabled. .P @@ -3008,7 +3008,7 @@ output from \fBpcre2test\fP: re> /(*COMMIT)abc/ data> xyzabc 0: abc - data> + data> re> /(*COMMIT)abc/no_start_optimize data> xyzabc No match @@ -3035,7 +3035,7 @@ as (*COMMIT). .P The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE). It is like (*MARK:NAME) in that the name is remembered for passing back to the -caller. However, (*SKIP:NAME) searches only for names set with (*MARK), +caller. However, (*SKIP:NAME) searches only for names set with (*MARK), ignoring those set by (*PRUNE) or (*THEN). .sp (*SKIP) @@ -3085,7 +3085,7 @@ group. If (*THEN) is not inside an alternation, it acts like (*PRUNE). .P The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN). It is like (*MARK:NAME) in that the name is remembered for passing back to the -caller. However, (*SKIP:NAME) searches only for names set with (*MARK), +caller. However, (*SKIP:NAME) searches only for names set with (*MARK), ignoring those set by (*PRUNE) and (*THEN). .P A subpattern that does not contain a | character is just a part of the diff --git a/doc/pcre2stack.3 b/doc/pcre2stack.3 index a20bb0c..c99d6e9 100644 --- a/doc/pcre2stack.3 +++ b/doc/pcre2stack.3 @@ -90,10 +90,10 @@ of how to do this are given in the \fBpcre2build\fP .\" documentation. When built in this way, instead of using the stack, PCRE2 -gets memory for remembering backup points from the heap. By default, the memory -is obtained by calling the system \fBmalloc()\fP function, but you can arrange -to supply your own memory management function. For details, see the section -entitled +gets memory for remembering backup points from the heap. By default, the memory +is obtained by calling the system \fBmalloc()\fP function, but you can arrange +to supply your own memory management function. For details, see the section +entitled .\" HTML .\" "The match context" @@ -104,8 +104,8 @@ in the .\" documentation. Since the block sizes are always the same, it may be possible to implement customized a memory handler that is more efficient than the standard -function. The memory blocks obtained for this purpose are retained and re-used -if possible while \fBpcre2_match()\fP is running. They are all freed just +function. The memory blocks obtained for this purpose are retained and re-used +if possible while \fBpcre2_match()\fP is running. They are all freed just before it exits. . . diff --git a/doc/pcre2syntax.3 b/doc/pcre2syntax.3 index 1b886e6..2bd6499 100644 --- a/doc/pcre2syntax.3 +++ b/doc/pcre2syntax.3 @@ -387,7 +387,7 @@ appear. (*LIMIT_MATCH=d) set the match limit to d (decimal number) (*LIMIT_RECURSION=d) set the recursion limit to d (decimal number) (*NOTEMPTY) set PCRE2_NOTEMPTY when matching - (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching + (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching (*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS) (*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE) (*UTF) set appropriate UTF mode for the library in use diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index bc96a77..6b220ae 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -433,7 +433,7 @@ about the pattern: /I info show info about compiled pattern hex pattern is coded in hexadecimal jit[=] use JIT - jitverify verify JIT use + jitverify verify JIT use locale= use this locale memory show memory used newline= set newline type @@ -518,7 +518,7 @@ number in the range 0 to 7: 7 all three modes .sp If no number is given, 7 is assumed. If JIT compilation is successful, the -compiled JIT code will automatically be used when \fBpcre2_match()\fP is run +compiled JIT code will automatically be used when \fBpcre2_match()\fP is run for the appropriate type of match, except when incompatible run-time options are specified. For more details, see the .\" HREF @@ -670,7 +670,7 @@ for a description of their effects. partial_hard (or ph) set PCRE2_PARTIAL_HARD partial_soft (or ps) set PCRE2_PARTIAL_SOFT .sp -The partial matching modifiers are provided with abbreviations because they +The partial matching modifiers are provided with abbreviations because they appear frequently in tests. .P If the \fB/posix\fP modifier was present on the pattern, causing the POSIX @@ -844,8 +844,8 @@ context via \fBpcre2_set_match_limit()\fP and \fBpcre2_set_recursion_limit()\fP until it finds the minimum values for each parameter that allow \fBpcre2_match()\fP to complete without error. .P -If JIT is being used, only the match limit is relevant. If DFA matching is -being used, neither limit is relevant, and this modifier is ignored (with a +If JIT is being used, only the match limit is relevant. If DFA matching is +being used, neither limit is relevant, and this modifier is ignored (with a warning message). .P The \fImatch_limit\fP number is a measure of the amount of backtracking @@ -890,10 +890,10 @@ appears, though of course it can also be used to set a default in a \fB#subject\fP command. It specifies the number of pairs of offsets that are available for storing matching information. The default is 15. .P -At least one pair of offsets is always created by -\fBpcre2_match_data_create()\fP, for matching with PCRE2's native API, so a -value of 0 is the same as 1. However a value of 0 is useful when testing the -POSIX API because it causes \fBregexec()\fP to be called with a NULL capture +At least one pair of offsets is always created by +\fBpcre2_match_data_create()\fP, for matching with PCRE2's native API, so a +value of 0 is the same as 1. However a value of 0 is useful when testing the +POSIX API because it causes \fBregexec()\fP to be called with a NULL capture vector. . . diff --git a/doc/pcre2unicode.3 b/doc/pcre2unicode.3 index 3e0eaf4..f95dd99 100644 --- a/doc/pcre2unicode.3 +++ b/doc/pcre2unicode.3 @@ -57,7 +57,7 @@ individual code units. In UTF modes, the dot metacharacter matches one UTF character instead of a single code unit. .P -The escape sequence \eC can be used to match a single code unit, in a UTF mode, +The escape sequence \eC can be used to match a single code unit, in a UTF mode, but its use can lead to some strange effects because it breaks up multi-unit characters (see the description of \eC in the .\" HREF @@ -107,8 +107,8 @@ case-equivalent, and these are treated as such. .rs .sp When the PCRE2_UTF option is set, the strings passed as patterns and subjects -are (by default) checked for validity on entry to the relevant functions. -If an invalid UTF string is passed, an error return is given. +are (by default) checked for validity on entry to the relevant functions. +If an invalid UTF string is passed, an error return is given. .P UTF-16 and UTF-32 strings can indicate their endianness by special code knows as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting diff --git a/perltest.pl b/perltest.pl index c9d949e..964fc95 100755 --- a/perltest.pl +++ b/perltest.pl @@ -82,13 +82,13 @@ for (;;) chomp($pattern); $pattern =~ s/\s+$//; - + # Split the pattern from the modifiers and adjust them as necessary. - + $pattern =~ /^\s*((.).*\2)(.*)$/s; $pat = $1; $mod = $3; - + # The private "aftertext" modifier means "print $' afterwards". $showrest = ($mod =~ s/aftertext,?//); @@ -131,9 +131,9 @@ for (;;) for (;;) { last if ! ($_ = <$infile>); - last if $_ =~ /^\s*$/; - } - } + last if $_ =~ /^\s*$/; + } + } next NEXT_RE; } diff --git a/src/dftables.c b/src/dftables.c index 0e2dab3..b6417cc 100644 --- a/src/dftables.c +++ b/src/dftables.c @@ -41,7 +41,7 @@ POSSIBILITY OF SUCH DAMAGE. /* This is a freestanding support program to generate a file containing character tables for PCRE2. The tables are built according to the current -locale using the pcre2_maketables() function, which is part of the PCRE2 API. +locale using the pcre2_maketables() function, which is part of the PCRE2 API. */ #ifdef HAVE_CONFIG_H diff --git a/src/pcre2_auto_possess.c b/src/pcre2_auto_possess.c index 56ec430..f531398 100644 --- a/src/pcre2_auto_possess.c +++ b/src/pcre2_auto_possess.c @@ -38,7 +38,7 @@ POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ -/* This module contains functions that scan a compiled pattern and change +/* This module contains functions that scan a compiled pattern and change repeats into possessive repeats where possible. */ @@ -359,8 +359,8 @@ Returns: points to the start of the next opcode if *code is accepted NULL if *code is not accepted */ -static PCRE2_SPTR -get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc, +static PCRE2_SPTR +get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc, uint32_t *list) { PCRE2_UCHAR c = *code; @@ -387,7 +387,7 @@ if (c >= OP_STAR && c <= OP_TYPEPOSUPTO) if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO) code += IMM2_SIZE; - list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT && + list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT && c != OP_POSPLUS); switch(base) @@ -595,7 +595,7 @@ for(;;) Therefore infinite recursions are not possible. */ c = *code; - + /* Skip over callouts */ if (c == OP_CALLOUT) @@ -624,7 +624,7 @@ for(;;) /* If the bracket is capturing, and referenced by an OP_RECURSE, or it is an atomic sub-pattern (assert, once, etc.) the non-greedy case cannot be converted to a possessive form. */ - + if (base_list[1] == 0) return FALSE; switch(*(code - GET(code, 1))) @@ -636,7 +636,7 @@ for(;;) case OP_ONCE: case OP_ONCE_NC: /* Atomic sub-patterns and assertions can always auto-possessify their - last iterator. However, if the group was entered as a result of checking + last iterator. However, if the group was entered as a result of checking a previous iterator, this is not possible. */ return !entered_a_group; @@ -672,7 +672,7 @@ for(;;) do next_code += GET(next_code, 1); while (*next_code == OP_ALT); /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */ - + next_code += 1 + LINK_SIZE; if (!compare_opcodes(next_code, utf, cb, base_list, base_end)) return FALSE; @@ -681,14 +681,14 @@ for(;;) continue; default: - break; + break; } /* Check for a supported opcode, and load its properties. */ code = get_chr_property_list(code, utf, cb->fcc, list); if (code == NULL) return FALSE; /* Unsupported */ - + /* If either opcode is a small character list, set pointers for comparing characters from that list with another list, or with a property. */ @@ -778,7 +778,7 @@ for(;;) /* Because the bit sets are unaligned bytes, we need to perform byte comparison here. */ - + set_end = set1 + 32; if (invert_bits) { @@ -922,7 +922,7 @@ for(;;) autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP]; if (!accepted) return FALSE; - + if (list[1] == 0) return TRUE; /* Might be an empty repeat. */ continue; @@ -1093,8 +1093,8 @@ but some compilers complain about an unreachable statement. */ if appropriate. This function modifies the compiled opcode! Arguments: - code points to start of the byte code - utf TRUE in UTF mode + code points to start of the byte code + utf TRUE in UTF mode cb compile data block Returns: nothing @@ -1111,7 +1111,7 @@ uint32_t list[8]; for (;;) { c = *code; - + if (c >= OP_STAR && c <= OP_TYPEPOSUPTO) { c -= get_repeat_base(c) - OP_STAR; @@ -1244,7 +1244,7 @@ for (;;) } /* Add in the fixed length from the table */ - + code += PRIV(OP_lengths)[c]; /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index f8754c1..dbffc98 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -594,7 +594,7 @@ static pso pso_list[] = { { (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF }, { (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP }, { (uint8_t *)STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET }, - { (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR,17, PSO_FLG, PCRE2_NE_ATST_SET }, + { (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR,17, PSO_FLG, PCRE2_NE_ATST_SET }, { (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS }, { (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE }, { (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 }, @@ -675,12 +675,12 @@ static const uint8_t opcode_possessify[] = { PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION pcre2_code_free(pcre2_code *code) { -if (code != NULL) +if (code != NULL) { if (code->executable_jit != NULL) PRIV(jit_free)(code->executable_jit, &code->memctl); code->memctl.free(code, code->memctl.memory_data); - } + } } @@ -4462,7 +4462,7 @@ for (;; ptr++) syntax, so we just ignore the repeat. */ if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE && - previous[GET(previous, 1)] != OP_ALT) + previous[GET(previous, 1)] != OP_ALT) goto END_REPEAT; /* There is no sense in actually repeating assertions. The only potential @@ -5169,64 +5169,64 @@ for (;; ptr++) namelen = -1; /* => not a name; must set to avoid warning */ name = NULL; /* Always set to avoid warning */ recno = 0; /* Always set to avoid warning */ - + /* Point at character after (?( */ - + ptr++; /* Check for (?(VERSION[>]=n.m), which is a facility whereby indirect - users of PCRE2 via an application can discover which release of PCRE2 + users of PCRE2 via an application can discover which release of PCRE2 is being used. */ - - if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 && + + if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 && ptr[7] != CHAR_RIGHT_PARENTHESIS) { - BOOL ge = FALSE; + BOOL ge = FALSE; int major = 0; int minor = 0; - + ptr += 7; if (*ptr == CHAR_GREATER_THAN_SIGN) { ge = TRUE; ptr++; - } - + } + /* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT references its argument twice. */ - + if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr))) - { + { *errorcodeptr = ERR79; - goto FAILED; - } + goto FAILED; + } while (IS_DIGIT(*ptr)) major = major * 10 + *ptr++ - '0'; if (*ptr == CHAR_DOT) { - ptr++; - while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0'; - } + ptr++; + while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0'; + } if (*ptr != CHAR_RIGHT_PARENTHESIS) { *errorcodeptr = ERR79; goto FAILED; - } + } if (ge) code[1+LINK_SIZE] = ((PCRE2_MAJOR > major) || (PCRE2_MAJOR == major && PCRE2_MINOR >= minor))? OP_TRUE : OP_FALSE; - else + else code[1+LINK_SIZE] = (PCRE2_MAJOR == major && PCRE2_MINOR == minor)? OP_TRUE : OP_FALSE; - + ptr++; skipbytes = 1; break; /* End of condition processing */ - } - + } + /* Check for a test for recursion in a named group. */ if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND) @@ -5404,8 +5404,8 @@ for (;; ptr++) } /* Similarly, check for the (?(DEFINE) "condition", which is always - false. During compilation we set OP_DEFINE to distinguish this from - other OP_FALSE conditions so that it can be checked for having only one + false. During compilation we set OP_DEFINE to distinguish this from + other OP_FALSE conditions so that it can be checked for having only one branch, but after that the opcode is changed to OP_FALSE. */ else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0) @@ -6133,7 +6133,7 @@ for (;; ptr++) while (*tc != OP_KET); /* A DEFINE group is never obeyed inline (the "condition" is always - false). It must have only one branch. Having checked this, change the + false). It must have only one branch. Having checked this, change the opcode to OP_FALSE. */ if (code[LINK_SIZE+1] == OP_DEFINE) @@ -6143,7 +6143,7 @@ for (;; ptr++) *errorcodeptr = ERR54; goto FAILED; } - code[LINK_SIZE+1] = OP_FALSE; + code[LINK_SIZE+1] = OP_FALSE; bravalue = OP_DEFINE; /* Just a flag to suppress char handling below */ } @@ -6219,7 +6219,7 @@ for (;; ptr++) than one can replicate it as reqcu if necessary. If the subpattern has no firstcu, set "none" for the whole branch. In both cases, a zero repeat forces firstcu to "none". */ - + if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET) { if (subfirstcuflags >= 0) @@ -6759,7 +6759,7 @@ for (;;) reqcu = firstcu; reqcuflags = firstcuflags; } - } + } firstcuflags = REQ_NONE; } @@ -7389,12 +7389,12 @@ if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0) /* A NULL compile context means "use a default context" */ -if (ccontext == NULL) +if (ccontext == NULL) ccontext = (pcre2_compile_context *)(&PRIV(default_compile_context)); -/* A zero-terminated pattern is indicated by the special length value -PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero, -to ensure that it is always possible to look one code unit beyond the end of +/* A zero-terminated pattern is indicated by the special length value +PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero, +to ensure that it is always possible to look one code unit beyond the end of the pattern's characters. */ if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern); else @@ -7481,19 +7481,19 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && case PSO_OPT: cb.external_options |= p->value; break; - + case PSO_FLG: setflags |= p->value; - break; + break; case PSO_NL: newline = p->value; - setflags |= PCRE2_NL_SET; + setflags |= PCRE2_NL_SET; break; case PSO_BSR: bsr = p->value; - setflags |= PCRE2_BSR_SET; + setflags |= PCRE2_BSR_SET; break; case PSO_LIMM: @@ -7883,8 +7883,8 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 && /* If the pattern is still not anchored and we do not have a first code unit, see if there is one that is asserted (these are not saved during the compile -because they can cause conflicts with actual literals that follow). This code -need not be obeyed if PCRE2_NO_START_OPTIMIZE is set, as the data it would +because they can cause conflicts with actual literals that follow). This code +need not be obeyed if PCRE2_NO_START_OPTIMIZE is set, as the data it would create will not be used. */ if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0) @@ -7930,7 +7930,7 @@ if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0) } /* Handle the "required code unit", if one is set. In the case of an anchored -pattern, do this only if it follows a variable length item in the pattern. +pattern, do this only if it follows a variable length item in the pattern. Again, skip this if PCRE2_NO_START_OPTIMIZE is set. */ if (reqcuflags >= 0 && @@ -7973,7 +7973,7 @@ while (*codestart == OP_ALT); to set up information such as a bitmap of starting code units and a minimum matching length. */ -if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && +if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && PRIV(study)(re) != 0) { errorcode = ERR31; diff --git a/src/pcre2_config.c b/src/pcre2_config.c index e0d5250..9155907 100644 --- a/src/pcre2_config.c +++ b/src/pcre2_config.c @@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "config.h" #endif -/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes +/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes its value gets changed by pcre2_internal.h to be in code units. */ static int configured_link_size = LINK_SIZE; @@ -69,7 +69,7 @@ Arguments: Returns: 0 if data returned >= 0 if where is NULL, giving length required PCRE2_ERROR_BADOPTION if "where" not recognized - or JIT target requested when JIT not enabled + or JIT target requested when JIT not enabled */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION @@ -80,33 +80,33 @@ if (where == NULL) /* Requests a length */ switch(what) { default: - return PCRE2_ERROR_BADOPTION; - + return PCRE2_ERROR_BADOPTION; + case PCRE2_CONFIG_BSR: case PCRE2_CONFIG_JIT: case PCRE2_CONFIG_LINKSIZE: case PCRE2_CONFIG_NEWLINE: case PCRE2_CONFIG_STACKRECURSE: case PCRE2_CONFIG_UNICODE: - return sizeof(int); - + return sizeof(int); + case PCRE2_CONFIG_MATCHLIMIT: case PCRE2_CONFIG_PARENSLIMIT: case PCRE2_CONFIG_RECURSIONLIMIT: return sizeof(long int); - + /* These are handled below */ - + case PCRE2_CONFIG_JITTARGET: case PCRE2_CONFIG_UNICODE_VERSION: case PCRE2_CONFIG_VERSION: break; } - } + } switch (what) { - default: + default: return PCRE2_ERROR_BADOPTION; case PCRE2_CONFIG_BSR: @@ -129,9 +129,9 @@ switch (what) #ifdef SUPPORT_JIT { const char *v = PRIV(jit_get_target)(); - return (where == NULL)? (int)strlen(v) : + return (where == NULL)? (int)strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v); - } + } #else return PCRE2_ERROR_BADOPTION; #endif @@ -163,9 +163,9 @@ switch (what) *((int *)where) = 1; #endif break; - + case PCRE2_CONFIG_UNICODE_VERSION: - { + { #if defined SUPPORT_UNICODE const char *v = PRIV(unicode_version); #else @@ -183,15 +183,15 @@ switch (what) *((int *)where) = 0; #endif break; - - /* The hackery in setting "v" below is to cope with the case when + + /* The hackery in setting "v" below is to cope with the case when PCRE2_PRERELEASE is set to an empty string (which it is for real releases). - If the second alternative is used in this case, it does not leave a space + If the second alternative is used in this case, it does not leave a space before the date. On the other hand, if all four macros are put into a single - XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted. + XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted. There are problems using an "obvious" approach like this: - - XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR) + + XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR) XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE) because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion @@ -199,18 +199,18 @@ switch (what) argument consists of no preprocessing tokens, the behavior is undefined." It turns out the gcc treats this case as a single empty string - which is what we really want - but Visual C grumbles about the lack of an argument for the - macro. Unfortunately, both are within their rights. As there seems to be no - way to test for a macro's value being empty at compile time, we have to + macro. Unfortunately, both are within their rights. As there seems to be no + way to test for a macro's value being empty at compile time, we have to resort to a runtime test. */ - + case PCRE2_CONFIG_VERSION: - { + { const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)? XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) : XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE); - return (where == NULL)? (int)strlen(v) : + return (where == NULL)? (int)strlen(v) : PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v); - } + } } return 0; diff --git a/src/pcre2_context.c b/src/pcre2_context.c index 9c3037f..5b9b2f3 100644 --- a/src/pcre2_context.c +++ b/src/pcre2_context.c @@ -72,15 +72,15 @@ free(block); * Get a block and save memory control * *************************************************/ -/* This internal function is called to get a block of memory in which the +/* This internal function is called to get a block of memory in which the memory control data is to be stored at the start for future use. Arguments: size amount of memory required memctl pointer to a memctl block or NULL - + Returns: pointer to memory or NULL on failure -*/ +*/ PCRE2_EXP_DEFN void * PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl) @@ -88,7 +88,7 @@ PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl) pcre2_memctl *newmemctl; void *yield = (memctl == NULL)? malloc(size) : memctl->malloc(size, memctl->memory_data); -if (yield == NULL) return NULL; +if (yield == NULL) return NULL; newmemctl = (pcre2_memctl *)yield; if (memctl == NULL) { @@ -96,9 +96,9 @@ if (memctl == NULL) newmemctl->free = default_free; newmemctl->memory_data = NULL; } -else *newmemctl = *memctl; +else *newmemctl = *memctl; return yield; -} +} @@ -108,11 +108,11 @@ return yield; /* Initializing for compile and match contexts is done in separate, private functions so that these can be called from functions such as pcre2_compile() -when an external context is not supplied. The initializing functions have an +when an external context is not supplied. The initializing functions have an option to set up default memory management. */ PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION -pcre2_general_context_create(void *(*private_malloc)(size_t, void *), +pcre2_general_context_create(void *(*private_malloc)(size_t, void *), void (*private_free)(void *, void *), void *memory_data) { pcre2_general_context *gcontext; @@ -121,7 +121,7 @@ if (private_free == NULL) private_free = default_free; gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data); if (gcontext == NULL) return NULL; gcontext->memctl.malloc = private_malloc; -gcontext->memctl.free = private_free; +gcontext->memctl.free = private_free; gcontext->memctl.memory_data = memory_data; return gcontext; } @@ -136,7 +136,7 @@ const pcre2_compile_context PRIV(default_compile_context) = { PRIV(default_tables), BSR_DEFAULT, NEWLINE_DEFAULT, - PARENS_NEST_LIMIT }; + PARENS_NEST_LIMIT }; /* The create function copies the default into the new memory, but must override the default memory handling functions if a gcontext was provided. */ @@ -145,8 +145,8 @@ PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION pcre2_compile_context_create(pcre2_general_context *gcontext) { pcre2_compile_context *ccontext = PRIV(memctl_malloc)( - sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext); -if (ccontext == NULL) return NULL; + sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext); +if (ccontext == NULL) return NULL; *ccontext = PRIV(default_compile_context); if (gcontext != NULL) *((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext); @@ -159,14 +159,14 @@ when no context is supplied to a match function. */ const pcre2_match_context PRIV(default_match_context) = { { default_malloc, default_free, NULL }, -#ifdef HEAP_MATCH_RECURSE +#ifdef HEAP_MATCH_RECURSE { default_malloc, default_free, NULL }, #endif NULL, NULL, MATCH_LIMIT, - MATCH_LIMIT_RECURSION }; - + MATCH_LIMIT_RECURSION }; + /* The create function copies the default into the new memory, but must override the default memory handling functions if a gcontext was provided. */ @@ -174,8 +174,8 @@ PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION pcre2_match_context_create(pcre2_general_context *gcontext) { pcre2_match_context *mcontext = PRIV(memctl_malloc)( - sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext); -if (mcontext == NULL) return NULL; + sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext); +if (mcontext == NULL) return NULL; *mcontext = PRIV(default_match_context); if (gcontext != NULL) *((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext); @@ -190,8 +190,8 @@ return mcontext; PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION pcre2_general_context_copy(pcre2_general_context *gcontext) { -pcre2_general_context *new = - gcontext->memctl.malloc(sizeof(pcre2_real_general_context), +pcre2_general_context *new = + gcontext->memctl.malloc(sizeof(pcre2_real_general_context), gcontext->memctl.memory_data); if (new == NULL) return NULL; memcpy(new, gcontext, sizeof(pcre2_real_general_context)); @@ -202,8 +202,8 @@ return new; PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION pcre2_compile_context_copy(pcre2_compile_context *ccontext) { -pcre2_compile_context *new = - ccontext->memctl.malloc(sizeof(pcre2_real_compile_context), +pcre2_compile_context *new = + ccontext->memctl.malloc(sizeof(pcre2_real_compile_context), ccontext->memctl.memory_data); if (new == NULL) return NULL; memcpy(new, ccontext, sizeof(pcre2_real_compile_context)); @@ -214,8 +214,8 @@ return new; PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION pcre2_match_context_copy(pcre2_match_context *mcontext) { -pcre2_match_context *new = - mcontext->memctl.malloc(sizeof(pcre2_real_match_context), +pcre2_match_context *new = + mcontext->memctl.malloc(sizeof(pcre2_real_match_context), mcontext->memctl.memory_data); if (new == NULL) return NULL; memcpy(new, mcontext, sizeof(pcre2_real_match_context)); @@ -267,14 +267,14 @@ data. */ /* ------------ Compile contexts ------------ */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_character_tables(pcre2_compile_context *ccontext, +pcre2_set_character_tables(pcre2_compile_context *ccontext, const unsigned char *tables) { ccontext->tables = tables; return 0; } -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value) { switch(value) @@ -283,13 +283,13 @@ switch(value) case PCRE2_BSR_UNICODE: ccontext->bsr_convention = value; return 0; - + default: - return PCRE2_ERROR_BADDATA; + return PCRE2_ERROR_BADDATA; } } -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline) { switch(newline) @@ -301,10 +301,10 @@ switch(newline) case PCRE2_NEWLINE_ANYCRLF: ccontext->newline_convention = newline; return 0; - - default: - return PCRE2_ERROR_BADDATA; - } + + default: + return PCRE2_ERROR_BADDATA; + } } PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION @@ -315,7 +315,7 @@ return 0; } PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, +pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, int (*guard)(uint32_t)) { ccontext->stack_guard = guard; @@ -325,8 +325,8 @@ return 0; /* ------------ Match contexts ------------ */ -PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_callout(pcre2_match_context *mcontext, +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_callout(pcre2_match_context *mcontext, int (*callout)(pcre2_callout_block *), void *callout_data) { mcontext->callout = callout; @@ -349,8 +349,8 @@ return 0; } PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_set_recursion_memory_management(pcre2_match_context *mcontext, - void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *), +pcre2_set_recursion_memory_management(pcre2_match_context *mcontext, + void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *), void *mydata) { #ifdef HEAP_MATCH_RECURSE @@ -364,6 +364,6 @@ mcontext->stack_memctl.memory_data = mydata; (void)mydata; #endif return 0; -} +} /* End of pcre2_context.c */ diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index 557752a..fa9ccc7 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -376,7 +376,7 @@ stateblock *next_active_state, *next_new_state; const uint8_t *ctypes, *lcc, *fcc; PCRE2_SPTR ptr; -PCRE2_SPTR end_code; +PCRE2_SPTR end_code; PCRE2_SPTR first_op; dfa_recursion_info new_recursive; @@ -542,8 +542,8 @@ for (;;) BOOL partial_newline = FALSE; BOOL could_continue = reset_could_continue; reset_could_continue = FALSE; - - if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr; + + if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr; /* Make the new state list into the active state list and empty the new state list. */ @@ -633,7 +633,7 @@ for (;;) /* If this opcode inspects a character, but we are at the end of the subject, remember the fact for use when testing for a partial match. */ - + if (clen == 0 && poptable[codevalue] != 0) could_continue = TRUE; @@ -975,7 +975,7 @@ for (;;) if (utf) { FORWARDCHARTEST(temp, mb->end_subject); } #endif mb->last_used_ptr = temp; - } + } #ifdef SUPPORT_UNICODE if ((mb->poptions & PCRE2_UCP) != 0) { @@ -2643,7 +2643,7 @@ for (;;) if (condcode == OP_FALSE) { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } - + /* There is also an always-true condition */ if (condcode == OP_TRUE) @@ -2999,7 +2999,7 @@ for (;;) The "could_continue" variable is true if a state could have continued but for the fact that the end of the subject was reached. */ - + if (new_count <= 0) { if (rlevel == 1 && /* Top level, and */ @@ -3098,7 +3098,7 @@ if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject); /* Plausibility checks */ if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; -if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL) +if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL) return PCRE2_ERROR_NULL; if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE; if (start_offset > length) return PCRE2_ERROR_BADOFFSET; @@ -3127,19 +3127,19 @@ with different endianness. */ if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8) return PCRE2_ERROR_BADMODE; -/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the -options variable for this function. Users of PCRE2 who are not calling the -function directly would like to have a way of setting these flags, in the same +/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the +options variable for this function. Users of PCRE2 who are not calling the +function directly would like to have a way of setting these flags, in the same way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with -constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and -(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be -transferred to the options for this function. The bits are guaranteed to be -adjacent, but do not have the same values. This bit of Boolean trickery assumes -that the match-time bits are not more significant than the flag bits. If by -accident this is not the case, a compile-time division by zero error will +constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and +(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be +transferred to the options for this function. The bits are guaranteed to be +adjacent, but do not have the same values. This bit of Boolean trickery assumes +that the match-time bits are not more significant than the flag bits. If by +accident this is not the case, a compile-time division by zero error will occur. */ -#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET) +#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET) #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART) options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO)); #undef FF @@ -3168,7 +3168,7 @@ end_subject = subject + length; req_cu_ptr = start_match - 1; anchored = (options & (PCRE2_ANCHORED|PCRE2_DFA_RESTART)) != 0 || (re->overall_options & PCRE2_ANCHORED) != 0; - + /* The "must be at the start of a line" flags are used in a loop when finding where to start. */ @@ -3307,7 +3307,7 @@ for (;;) /* There are some optimizations that avoid running the match if a known starting point is not found, or if a known later code unit is not present. However, there is an option (settable at compile time) that disables - these, for testing and for ensuring that all callouts do actually occur. + these, for testing and for ensuring that all callouts do actually occur. The optimizations must also be avoided when restarting a DFA match. */ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && @@ -3493,7 +3493,7 @@ for (;;) /* Anything other than "no match" means we are done, always; otherwise, carry on only if not anchored. */ - + if (rc != PCRE2_ERROR_NOMATCH || anchored) { if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0) @@ -3504,7 +3504,7 @@ for (;;) match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject); match_data->rightchar = mb->last_used_ptr - subject; match_data->startchar = (PCRE2_SIZE)(start_match - subject); - match_data->rc = rc; + match_data->rc = rc; return rc; } diff --git a/src/pcre2_error.c b/src/pcre2_error.c index 20e9a79..f2f900f 100644 --- a/src/pcre2_error.c +++ b/src/pcre2_error.c @@ -48,7 +48,7 @@ POSSIBILITY OF SUCH DAMAGE. #define STRING(a) # a #define XSTRING(s) STRING(s) -/* The texts of compile-time error messages. Compile-time error numbers start +/* The texts of compile-time error messages. Compile-time error numbers start at COMPILE_ERROR_BASE (100). Do not ever re-use any error number, because they are documented. Always add a @@ -101,7 +101,7 @@ static const char compile_error_texts[] = "(?R or (?[+-]digits must be followed by )\0" /* 30 */ "unknown POSIX class name\0" - "internal error in pcre2_study(): should not occur\0" + "internal error in pcre2_study(): should not occur\0" "this version of PCRE does not have UTF or Unicode property support\0" "parentheses are too deeply nested (stack check)\0" "character code point value in \\x{} or \\o{} is too large\0" @@ -158,94 +158,94 @@ static const char compile_error_texts[] = "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" "character code point value in \\u.... sequence is too large\0" "digits missing in \\x{} or \\o{}\0" - "syntax error in (?(VERSION condition\0" + "syntax error in (?(VERSION condition\0" ; /* Match-time and UTF error texts are in the same format. */ static const char match_error_texts[] = "no error\0" - "no match\0" + "no match\0" "partial match\0" "UTF-8 error: 1 byte missing at end\0" "UTF-8 error: 2 bytes missing at end\0" - /* 5 */ + /* 5 */ "UTF-8 error: 3 bytes missing at end\0" "UTF-8 error: 4 bytes missing at end\0" "UTF-8 error: 5 bytes missing at end\0" - "UTF-8 error: byte 2 top bits not 0x80\0" - "UTF-8 error: byte 3 top bits not 0x80\0" - /* 10 */ - "UTF-8 error: byte 4 top bits not 0x80\0" - "UTF-8 error: byte 5 top bits not 0x80\0" + "UTF-8 error: byte 2 top bits not 0x80\0" + "UTF-8 error: byte 3 top bits not 0x80\0" + /* 10 */ + "UTF-8 error: byte 4 top bits not 0x80\0" + "UTF-8 error: byte 5 top bits not 0x80\0" "UTF-8 error: byte 6 top bits not 0x80\0" - "UTF-8 error: 5-byte character is not allowed (RFC 3629)\0" + "UTF-8 error: 5-byte character is not allowed (RFC 3629)\0" "UTF-8 error: 6-byte character is not allowed (RFC 3629)\0" - /* 15 */ + /* 15 */ "UTF-8 error: code points greater than 0x10ffff are not defined\0" - "UTF-8 error: code points 0xd800-0xdfff are not defined\0" - "UTF-8 error: overlong 2-byte sequence\0" - "UTF-8 error: overlong 3-byte sequence\0" + "UTF-8 error: code points 0xd800-0xdfff are not defined\0" + "UTF-8 error: overlong 2-byte sequence\0" + "UTF-8 error: overlong 3-byte sequence\0" "UTF-8 error: overlong 4-byte sequence\0" - /* 20 */ + /* 20 */ "UTF-8 error: overlong 5-byte sequence\0" "UTF-8 error: overlong 6-byte sequence\0" "UTF-8 error: isolated 0x80 byte\0" - "UTF-8 error: illegal byte (0xfe or 0xff)\0" - "UTF-16 error: missing low surrogate at end\0" - /* 25 */ - "UTF-16 error: invalid low surrogate\0" - "UTF-16 error: isolated low surrogate\0" + "UTF-8 error: illegal byte (0xfe or 0xff)\0" + "UTF-16 error: missing low surrogate at end\0" + /* 25 */ + "UTF-16 error: invalid low surrogate\0" + "UTF-16 error: isolated low surrogate\0" "UTF-32 error: code points 0xd800-0xdfff are not defined\0" - "UTF-32 error: code points greater than 0x10ffff are not defined\0" + "UTF-32 error: code points greater than 0x10ffff are not defined\0" "bad data value\0" - /* 30 */ + /* 30 */ "bad length\0" "magic number missing\0" "pattern compiled in wrong mode: 8/16/32-bit error\0" "bad offset value\0" "bad option value\0" - /* 35 */ + /* 35 */ "bad offset into UTF string\0" - "callout error code\0" /* Never returned by PCRE2 itself */ + "callout error code\0" /* Never returned by PCRE2 itself */ "invalid data in workspace for DFA restart\0" "too much recursion for DFA matching\0" "backreference condition or recursion test not supported for DFA matching\0" - /* 40 */ + /* 40 */ "item unsupported for DFA matching\0" "workspace size exceeded in DFA matching\0" "internal error - pattern overwritten?\0" "bad JIT option\0" "JIT stack limit reached\0" - /* 45 */ + /* 45 */ "match limit exceeded\0" "no more memory\0" - "unknown or unset substring\0" + "unknown or unset substring\0" "NULL argument passed\0" "nested recursion at the same subject position\0" - /* 50 */ + /* 50 */ "recursion limit exceeded\0" - "requested value is not set\0" - ; + "requested value is not set\0" + ; /************************************************* * Return error message * *************************************************/ -/* This function copies an error message into a buffer whose units are of an -appropriate width. Error numbers are positive for compile-time errors, and -negative for match-time errors (except for UTF errors), but the numbers are all +/* This function copies an error message into a buffer whose units are of an +appropriate width. Error numbers are positive for compile-time errors, and +negative for match-time errors (except for UTF errors), but the numbers are all distinct. Arguments: enumber error number buffer where to put the message (zero terminated) size size of the buffer - + Returns: length of message if all is well negative on error -*/ +*/ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, size_t size) @@ -260,23 +260,23 @@ if (size == 0) return PCRE2_ERROR_NOMEMORY; if (enumber > COMPILE_ERROR_BASE) /* Compile error */ { message = compile_error_texts; - n = enumber - COMPILE_ERROR_BASE; - } + n = enumber - COMPILE_ERROR_BASE; + } else /* Match or UTF error */ { message = match_error_texts; - n = -enumber; - } - + n = -enumber; + } + for (; n > 0; n--) { while (*message++ != CHAR_NULL) {}; - if (*message == CHAR_NULL) - { + if (*message == CHAR_NULL) + { sprintf(xbuff, "Internal error: no text for error %d", enumber); - break; + break; } - } + } for (i = 0; *message != 0; i++) { @@ -287,9 +287,9 @@ for (i = 0; *message != 0; i++) } buffer[i] = *message++; } - + buffer[i] = 0; -return i; +return i; } /* End of pcre2_error.c */ diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index 6867a54..8738688 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -1553,11 +1553,11 @@ enum { /* This is used to skip a subpattern with a {0} quantifier */ OP_SKIPZERO, /* 162 */ - + /* This is used to identify a DEFINE group during compilation so that it can - be checked for having only one branch. It is changed to OP_FALSE before + be checked for having only one branch. It is changed to OP_FALSE before compilation finishes. */ - + OP_DEFINE, /* 163 */ /* This is not an opcode, but is used to check that tables indexed by opcode @@ -1565,7 +1565,7 @@ enum { some in the past. */ OP_TABLE_LENGTH - + }; /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro @@ -1708,7 +1708,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 1, 3, /* THEN, THEN_ARG */ \ 1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \ 1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \ - 1 /* DEFINE */ + 1 /* DEFINE */ /* A magic value for OP_RREF to indicate the "any recursion" condition. */ @@ -1830,9 +1830,9 @@ extern const uint8_t PRIV(ucd_stage1)[]; extern const uint16_t PRIV(ucd_stage2)[]; extern const uint32_t PRIV(ucp_gbtable)[]; extern const uint32_t PRIV(ucp_gentype)[]; -#ifdef SUPPORT_JIT +#ifdef SUPPORT_JIT extern const int PRIV(ucp_typerange)[]; -#endif +#endif extern const char *PRIV(unicode_version); extern const ucp_type_table PRIV(utt)[]; extern const char PRIV(utt_names)[]; diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index 42d3571..5e1abc9 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -39,16 +39,16 @@ POSSIBILITY OF SUCH DAMAGE. */ -/* This module contains mode-dependent macro and structure definitions. The +/* This module contains mode-dependent macro and structure definitions. The file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined. These mode-dependent items are kept in a separate file so that they can also be -#included multiple times for different code unit widths by pcre2test in order -to have access to the hidden structures at all supported widths. +#included multiple times for different code unit widths by pcre2test in order +to have access to the hidden structures at all supported widths. Some of the mode-dependent macros are required at different widths for different parts of the pcre2test code (in particular, the included pcre_printint.c file). We undefine them here so that they can be re-defined for -multiple inclusions. Not all of these are used in pcre2test, but it's easier +multiple inclusions. Not all of these are used in pcre2test, but it's easier just to undefine them all. */ #undef ACROSSCHAR @@ -93,7 +93,7 @@ request for an even bigger limit. For this reason, and also to make the code easier to maintain, the storing and loading of offsets from the compiled code unit string is now handled by the macros that are defined here. -The macros are controlled by the value of LINK_SIZE. This defaults to 2, but +The macros are controlled by the value of LINK_SIZE. This defaults to 2, but values of 2 or 4 are also supported. */ /* ------------------- 8-bit support ------------------ */ @@ -173,14 +173,14 @@ values of 2 or 4 are also supported. */ #else #error Unsupported compiling mode -#endif +#endif /* --------------- Other mode-specific macros ----------------- */ /* PCRE uses some other (at least) 16-bit quantities that do not change when the size of offsets changes. There are used for repeat counts and for other -things such as capturing parenthesis numbers in back references. +things such as capturing parenthesis numbers in back references. Define the number of code units required to hold a 16-bit count/offset, and macros to load and store such a value. For reasons that I do not understand, @@ -196,7 +196,7 @@ arithmetic results in a signed value. Hence the cast. */ #else /* Code units are 16 or 32 bits */ #define IMM2_SIZE 1 #define GET2(a,n) a[n] -#define PUT2(a,n,d) a[n] = d +#define PUT2(a,n,d) a[n] = d #endif /* Other macros that are different for 8-bit mode. The MAX_255 macro checks @@ -346,7 +346,7 @@ because almost all calls are already within a block of UTF-8 only code. */ /* Same as above, but it allows a fully customizable form. */ #define ACROSSCHAR(condition, eptr, action) \ while((condition) && ((eptr) & 0xc0) == 0x80) action - + /* Deposit a character into memory, returning the number of code units. */ #define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \ @@ -545,10 +545,10 @@ These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */ /* ----------------------- HIDDEN STRUCTURES ----------------------------- */ -/* NOTE: All these structures *must* start with a pcre2_memctl structure. The +/* NOTE: All these structures *must* start with a pcre2_memctl structure. The code that uses them is simpler because it assumes this. */ -/* The real general context structure. At present it holds only data for custom +/* The real general context structure. At present it holds only data for custom memory control. */ typedef struct pcre2_real_general_context { @@ -572,9 +572,9 @@ typedef struct pcre2_real_match_context { pcre2_memctl memctl; #ifdef HEAP_MATCH_RECURSE pcre2_memctl stack_memctl; -#endif +#endif int (*callout)(pcre2_callout_block *); - void *callout_data; + void *callout_data; uint32_t match_limit; uint32_t recursion_limit; } pcre2_real_match_context; @@ -584,9 +584,9 @@ typedef struct pcre2_real_match_context { typedef struct pcre2_real_code { pcre2_memctl memctl; /* Memory control fields */ const uint8_t *tables; /* The character tables */ - void *executable_jit; /* Pointer to JIT code */ + void *executable_jit; /* Pointer to JIT code */ uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */ - size_t blocksize; /* Total (bytes) that was malloc-ed */ + size_t blocksize; /* Total (bytes) that was malloc-ed */ uint32_t magic_number; /* Paranoid and endianness check */ uint32_t compile_options; /* Options passed to pcre2_compile() */ uint32_t overall_options; /* Options after processing the pattern */ @@ -596,10 +596,10 @@ typedef struct pcre2_real_code { uint32_t first_codeunit; /* Starting code unit */ uint32_t last_codeunit; /* This codeunit must be seen */ uint16_t bsr_convention; /* What \R matches */ - uint16_t newline_convention; /* What is a newline? */ + uint16_t newline_convention; /* What is a newline? */ uint16_t max_lookbehind; /* Longest lookbehind (characters) */ - uint16_t minlength; /* Minimum length of match */ - uint16_t top_bracket; /* Highest numbered group */ + uint16_t minlength; /* Minimum length of match */ + uint16_t top_bracket; /* Highest numbered group */ uint16_t top_backref; /* Highest numbered back reference */ uint16_t name_entry_size; /* Size (code units) of table entries */ uint16_t name_count; /* Number of name entries in the table */ @@ -614,10 +614,10 @@ typedef struct pcre2_real_match_data { int rc; /* The return code from the match */ PCRE2_SIZE leftchar; /* Offset to leftmost code unit */ PCRE2_SIZE rightchar; /* Offset to rightmost code unit */ - PCRE2_SIZE startchar; /* Offset to starting code unit */ - PCRE2_SPTR mark; /* Pointer to last mark */ + PCRE2_SIZE startchar; /* Offset to starting code unit */ + PCRE2_SPTR mark; /* Pointer to last mark */ uint16_t oveccount; /* Number of pairs */ - PCRE2_SIZE ovector[1]; /* The first field */ + PCRE2_SIZE ovector[1]; /* The first field */ } pcre2_real_match_data; @@ -700,7 +700,7 @@ the system stack. */ typedef struct ovecsave_frame { struct ovecsave_frame *next; /* Next frame on free chain */ PCRE2_SIZE saved_ovec[1]; /* First vector element */ -} ovecsave_frame; +} ovecsave_frame; /* Structure for items in a linked list that represents an explicit recursive call within the pattern; used by pcre_match(). */ @@ -738,7 +738,7 @@ typedef struct match_block { pcre2_memctl memctl; /* For general use */ #ifdef HEAP_MATCH_RECURSE pcre2_memctl stack_memctl; /* For "stack" frames */ -#endif +#endif uint32_t match_call_count; /* As it says */ uint32_t match_limit; /* As it says */ uint32_t match_limit_recursion; /* As it says */ @@ -763,7 +763,7 @@ typedef struct match_block { PCRE2_SPTR start_match_ptr; /* Start of matched string */ PCRE2_SPTR end_match_ptr; /* Subject position at end match */ PCRE2_SPTR start_used_ptr; /* Earliest consulted character */ - PCRE2_SPTR last_used_ptr; /* Latest consulted character */ + PCRE2_SPTR last_used_ptr; /* Latest consulted character */ PCRE2_SPTR mark; /* Mark pointer to pass back on success */ PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */ PCRE2_SPTR once_target; /* Where to back up to for atomic groups */ @@ -778,7 +778,7 @@ typedef struct match_block { PCRE2_UCHAR nl[4]; /* Newline string when fixed */ eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */ recursion_info *recursive; /* Linked list of recursion data */ - ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */ + ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */ void *callout_data; /* To pass back to callouts */ int (*callout)(pcre2_callout_block *); /* Callout function or NULL */ #ifdef HEAP_MATCH_RECURSE @@ -795,7 +795,7 @@ typedef struct dfa_match_block { PCRE2_SPTR start_subject ; /* Start of the subject string */ PCRE2_SPTR end_subject; /* End of subject string */ PCRE2_SPTR start_used_ptr; /* Earliest consulted character */ - PCRE2_SPTR last_used_ptr; /* Latest consulted character */ + PCRE2_SPTR last_used_ptr; /* Latest consulted character */ const uint8_t *tables; /* Character tables */ PCRE2_SIZE start_offset; /* The start offset value */ uint32_t moptions; /* Match options */ diff --git a/src/pcre2_jit_match.c b/src/pcre2_jit_match.c index bc9e9ae..9d18cbf 100644 --- a/src/pcre2_jit_match.c +++ b/src/pcre2_jit_match.c @@ -72,9 +72,9 @@ Arguments: length length of subject string (may contain binary zeros) start_offset where to start in the subject string options option bits - match_data points to a match_data block - mcontext points to a match context - jit_stack points to a JIT stack + match_data points to a match_data block + mcontext points to a match context + jit_stack points to a JIT stack Returns: > 0 => success; value is the number of ovector pairs filled = 0 => success, but ovector is not big enough diff --git a/src/pcre2_maketables.c b/src/pcre2_maketables.c index 5df7ca4..ca68bca 100644 --- a/src/pcre2_maketables.c +++ b/src/pcre2_maketables.c @@ -60,9 +60,9 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */ /* This function builds a set of character tables for use by PCRE2 and returns a pointer to them. They are build using the ctype functions, and consequently their contents will depend upon the current locale setting. When compiled as -part of the library, the store is obtained via a general context malloc, if -supplied, but when DFTABLES is defined (when compiling the dftables auxiliary -program) malloc() is used, and the function has a different name so as not to +part of the library, the store is obtained via a general context malloc, if +supplied, but when DFTABLES is defined (when compiling the dftables auxiliary +program) malloc() is used, and the function has a different name so as not to clash with the prototype in pcre2.h. Arguments: none when DFTABLES is defined diff --git a/src/pcre2_match.c b/src/pcre2_match.c index e6a631e..94309ee 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -56,7 +56,7 @@ POSSIBILITY OF SUCH DAMAGE. (PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \ PCRE2_PARTIAL_SOFT) - + #define PUBLIC_JIT_MATCH_OPTIONS \ (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\ PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD) @@ -125,24 +125,24 @@ ovector length is always a multiple of 3. */ /* This function is called only when it is known that the offset lies within the offsets that have so far been used in the match. Note that in caseless UTF-8 mode, the number of subject bytes matched may be different to the number -of reference bytes. (In theory this could also happen in UTF-16 mode, but it +of reference bytes. (In theory this could also happen in UTF-16 mode, but it seems unlikely.) Arguments: offset index into the offset vector - offset_top top of the used offset vector + offset_top top of the used offset vector eptr pointer into the subject mb points to match block caseless TRUE if caseless - lengthptr pointer for returning the length matched + lengthptr pointer for returning the length matched Returns: = 0 sucessful match; number of code units matched is set < 0 no match - > 0 partial match + > 0 partial match */ static int -match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr, +match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr, match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr) { #if defined SUPPORT_UNICODE @@ -153,7 +153,7 @@ register PCRE2_SPTR p; PCRE2_SIZE length; PCRE2_SPTR eptr_start = eptr; -/* Deal with an unset group. The default is no match, but there is an option to +/* Deal with an unset group. The default is no match, but there is an option to match an empty string. */ if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET) @@ -164,7 +164,7 @@ if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET) return 0; /* Match */ } else return -1; /* No match */ - } + } /* Separate the caseless and UTF cases for speed. */ @@ -217,7 +217,7 @@ if (caseless) if (eptr >= mb->end_subject) return 1; /* Partial match */ cc = UCHAR21TEST(eptr); cp = UCHAR21TEST(p); - if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc)) + if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc)) return -1; /* No match */ p++; eptr++; @@ -345,7 +345,7 @@ argument of match(), which never changes. */ } -/* Structure for remembering the local variables in a private frame. Arrange it +/* Structure for remembering the local variables in a private frame. Arrange it so as to minimize the number of holes. */ typedef struct heapframe { @@ -364,7 +364,7 @@ typedef struct heapframe { PCRE2_SPTR Xpp; PCRE2_SPTR Xprev; PCRE2_SPTR Xsaved_eptr; - + eptrblock *Xeptrb; PCRE2_SIZE Xlength; @@ -377,7 +377,7 @@ typedef struct heapframe { uint32_t Xrdepth; uint32_t Xop; uint32_t Xsave_capture_last; - + #ifdef SUPPORT_UNICODE uint32_t Xprop_value; int Xprop_type; @@ -401,7 +401,7 @@ typedef struct heapframe { #ifdef SUPPORT_UNICODE PCRE2_UCHAR Xocchars[6]; -#endif +#endif } heapframe; #endif @@ -414,9 +414,9 @@ typedef struct heapframe { /* When HEAP_MATCH_RECURSE is not defined, the match() function implements backtrack points by calling itself recursively in all but one case. The one special case is when processing OP_RECURSE, which specifies recursion in the -pattern. The entire ovector must be saved and restored while processing -OP_RECURSE. If the ovector is small enough, instead of calling match() -directly, op_recurse_ovecsave() is called. This function uses the system stack +pattern. The entire ovector must be saved and restored while processing +OP_RECURSE. If the ovector is small enough, instead of calling match() +directly, op_recurse_ovecsave() is called. This function uses the system stack to save the ovector while calling match() to process the pattern recursion. */ #ifndef HEAP_MATCH_RECURSE @@ -425,7 +425,7 @@ to save the ovector while calling match() to process the pattern recursion. */ op_recurse_ovecsave(). */ static int -match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart, +match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart, PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth); @@ -433,7 +433,7 @@ match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart, * Process OP_RECURSE, stacking ovector * *************************************************/ -/* When this function is called, mb->recursive has already been updated to +/* When this function is called, mb->recursive has already been updated to point to a new recursion data block, and all its fields other than ovec_save have been set. @@ -447,9 +447,9 @@ Arguments: eptrb pointer to chain of blocks containing eptr at start of brackets - for testing for empty matches rdepth the recursion depth - + Returns: a match() return code -*/ +*/ static int op_recurse_ovecsave(REGISTER PCRE2_SPTR eptr, PCRE2_SPTR callpat, @@ -472,7 +472,7 @@ data and the last captured value. */ do { if (cbegroup) mb->match_function_type = MATCH_CBEGROUP; - rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top, + rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top, mb, eptrb, rdepth + 1); memcpy(mb->ovector, new_recursive->ovec_save, mb->offset_end * sizeof(PCRE2_SIZE)); @@ -560,7 +560,7 @@ Returns: MATCH_MATCH if matched ) these values are >= 0 */ static int -match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart, +match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart, PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth) { /* These variables do not need to be preserved over recursion in this function, @@ -1382,10 +1382,10 @@ for (;;) case OP_FALSE: break; - + case OP_TRUE: condition = TRUE; - break; + break; /* The condition is an assertion. Call match() to evaluate it - setting mb->match_function_type to MATCH_CONDASSERT causes it to stop at the end @@ -1475,7 +1475,7 @@ for (;;) update the last used pointer. */ case OP_ASSERT_ACCEPT: - if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr; + if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr; case OP_ACCEPT: case OP_END: @@ -1735,7 +1735,7 @@ for (;;) case OP_RECURSE: { - ovecsave_frame *fr; + ovecsave_frame *fr; recursion_info *ri; uint32_t recno; @@ -1762,15 +1762,15 @@ for (;;) ecode += 1 + LINK_SIZE; - /* When we are using the system stack for match() recursion we can call a - function that uses the system stack for preserving the ovector while + /* When we are using the system stack for match() recursion we can call a + function that uses the system stack for preserving the ovector while processing the pattern recursion, but only if the ovector is small enough. */ - + #ifndef HEAP_MATCH_RECURSE if (mb->offset_end <= OP_RECURSE_STACK_SAVE_MAX) { - rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb, + rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb, eptrb, rdepth); mb->recursive = new_recursive.prevrec; if (rrc != MATCH_MATCH && rrc != MATCH_ACCEPT) RRETURN(rrc); @@ -1785,10 +1785,10 @@ for (;;) } #endif /* If the ovector is too big, or if we are using the heap for match() - recursion, we have to use the heap for saving the ovector. Used ovecsave - frames are kept on a chain and re-used. This makes a small improvement in + recursion, we have to use the heap for saving the ovector. Used ovecsave + frames are kept on a chain and re-used. This makes a small improvement in execution time on Linux. */ - + if (mb->ovecsave_chain != NULL) { new_recursive.ovec_save = mb->ovecsave_chain->saved_ovec; @@ -1800,17 +1800,17 @@ for (;;) mb->offset_end * sizeof(PCRE2_SIZE), mb->memctl.memory_data)); if (fr == NULL) RRETURN(PCRE2_ERROR_NOMEMORY); new_recursive.ovec_save = fr->saved_ovec; - } - + } + memcpy(new_recursive.ovec_save, mb->ovector, mb->offset_end * sizeof(PCRE2_SIZE)); - + /* Do the recursion. After processing each alternative, restore the ovector data and the last captured value. This code has the same overall logic as the code in the op_recurse_ovecsave() function, but is adapted to use RMATCH/RRETURN and to release the heap block containing the saved ovector. */ - + cbegroup = (*callpat >= OP_SBRA); do { @@ -1821,51 +1821,51 @@ for (;;) mb->offset_end * sizeof(PCRE2_SIZE)); mb->capture_last = new_recursive.saved_capture_last; mb->recursive = new_recursive.prevrec; - + if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) { fr = (ovecsave_frame *) ((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *)); fr->next = mb->ovecsave_chain; - mb->ovecsave_chain = fr; - + mb->ovecsave_chain = fr; + /* Set where we got to in the subject, and reset the start, in case it was changed by \K. This *is* propagated back out of a recursion, for Perl compatibility. */ - + eptr = mb->end_match_ptr; mstart = mb->start_match_ptr; goto RECURSION_MATCHED; /* Exit loop; end processing */ } - + /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a recursion; they cause a NOMATCH for the entire recursion. These codes are defined in a range that can be tested for. */ - + if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX) - { + { rrc = MATCH_NOMATCH; - goto RECURSION_RETURN; + goto RECURSION_RETURN; } - + /* Any return code other than NOMATCH is an error. */ - + if (rrc != MATCH_NOMATCH) goto RECURSION_RETURN; mb->recursive = &new_recursive; callpat += GET(callpat, 1); } while (*callpat == OP_ALT); - + RECURSION_RETURN: mb->recursive = new_recursive.prevrec; fr = (ovecsave_frame *) ((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *)); fr->next = mb->ovecsave_chain; - mb->ovecsave_chain = fr; + mb->ovecsave_chain = fr; RRETURN(rrc); } - - RECURSION_MATCHED: + + RECURSION_MATCHED: break; /* An alternation is the end of a branch; scan along to find the end of the @@ -1942,7 +1942,7 @@ for (;;) mb->end_match_ptr = eptr; /* For ONCE_NC */ mb->end_offset_top = offset_top; mb->start_match_ptr = mstart; - if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr; + if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr; RRETURN(MATCH_MATCH); /* Sets mb->mark */ } @@ -1966,7 +1966,7 @@ for (;;) { mb->end_match_ptr = eptr; mb->start_match_ptr = mstart; - if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr; + if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr; RRETURN(MATCH_MATCH); } @@ -2010,7 +2010,7 @@ for (;;) mb->start_match_ptr = mstart; /* In case \K reset it */ mb->end_match_ptr = eptr; mb->end_offset_top = offset_top; - if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr; + if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr; RRETURN(MATCH_KETRPOS); } @@ -2230,8 +2230,8 @@ for (;;) else { PCRE2_SPTR nextptr = eptr + 1; - FORWARDCHARTEST(nextptr, mb->end_subject); - if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr; + FORWARDCHARTEST(nextptr, mb->end_subject); + if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr; GETCHAR(c, eptr); if ((mb->poptions & PCRE2_UCP) != 0) { @@ -2282,7 +2282,7 @@ for (;;) } else { - if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1; + if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1; #ifdef SUPPORT_UNICODE if ((mb->poptions & PCRE2_UCP) != 0) { @@ -2297,7 +2297,7 @@ for (;;) #endif cur_is_word = MAX_255(*eptr) && ((mb->ctypes[*eptr] & ctype_word) != 0); - } + } } /* Now see if the situation is what we want */ @@ -2689,7 +2689,7 @@ for (;;) /* Match a back reference, possibly repeatedly. Look past the end of the item to see if there is repeat information following. - + The OP_REF and OP_REFI opcodes are used for a reference to a numbered group or to a non-duplicated named group. For a duplicated named group, OP_DNREF and OP_DNREFI are used. In this case we must scan the list of groups to @@ -2705,7 +2705,7 @@ for (;;) /* Initializing 'offset' avoids a compiler warning in the REF_REPEAT code. */ - + offset = 0; while (count-- > 0) { @@ -2721,7 +2721,7 @@ for (;;) caseless = op == OP_REFI; offset = GET2(ecode, 1) << 1; /* Doubled ref number */ ecode += 1 + IMM2_SIZE; - + /* Set up for repetition, or handle the non-repeated case */ REF_REPEAT: @@ -2750,7 +2750,7 @@ for (;;) break; default: /* No repeat follows */ - { + { int rc = match_ref(offset, offset_top, eptr, mb, caseless, &length); if (rc != 0) { @@ -2758,7 +2758,7 @@ for (;;) CHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - } + } eptr += length; continue; /* With the main loop */ } @@ -2769,16 +2769,16 @@ for (;;) also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset group be have as a zero-length group. For any other unset cases, carrying on will result in NOMATCH. */ - + if (offset < offset_top && mb->ovector[offset] != PCRE2_UNSET) - { + { if (mb->ovector[offset] == mb->ovector[offset + 1]) continue; } else /* Group is not set */ { if (min == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0) - continue; - } + continue; + } /* First, ensure the minimum number of matches are present. We get back the length of the reference string explicitly rather than passing the @@ -2787,7 +2787,7 @@ for (;;) for (i = 1; i <= min; i++) { PCRE2_SIZE slength; - int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength); + int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength); if (rc != 0) { if (rc > 0) eptr = mb->end_subject; /* Partial match */ @@ -2808,13 +2808,13 @@ for (;;) { for (fi = min;; fi++) { - int rc; + int rc; PCRE2_SIZE slength; RMATCH(eptr, ecode, offset_top, mb, eptrb, RM14); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength); - if (rc != 0) + if (rc != 0) { if (rc > 0) eptr = mb->end_subject; /* Partial match */ CHECK_PARTIAL(); @@ -2825,12 +2825,12 @@ for (;;) /* Control never gets here */ } - /* If maximizing, find the longest string and work backwards, as long as + /* If maximizing, find the longest string and work backwards, as long as the matched lengths for each iteration are the same. */ else { - BOOL samelengths = TRUE; + BOOL samelengths = TRUE; pp = eptr; length = mb->ovector[offset+1] - mb->ovector[offset]; @@ -2839,7 +2839,7 @@ for (;;) PCRE2_SIZE slength; int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength); - if (rc != 0) + if (rc != 0) { /* Can't use CHECK_PARTIAL because we don't want to update eptr in the soft partial matching case. */ @@ -2857,14 +2857,14 @@ for (;;) eptr += slength; } - /* If the length matched for each repetition is the same as the length of - the captured group, we can easily work backwards. This is the normal - case. However, in caseless UTF-8 mode there are pairs of case-equivalent + /* If the length matched for each repetition is the same as the length of + the captured group, we can easily work backwards. This is the normal + case. However, in caseless UTF-8 mode there are pairs of case-equivalent characters whose lengths (in terms of code units) differ. However, this is very rare, so we handle it by re-matching fewer and fewer times. */ - + if (samelengths) - { + { while (eptr >= pp) { RMATCH(eptr, ecode, offset_top, mb, eptrb, RM15); @@ -2872,20 +2872,20 @@ for (;;) eptr -= length; } } - - /* The rare case of non-matching lengths. Re-scan the repetition for each + + /* The rare case of non-matching lengths. Re-scan the repetition for each iteration. We know that match_ref() will succeed every time. */ - + else { - max = i; + max = i; for (;;) { RMATCH(eptr, ecode, offset_top, mb, eptrb, RM68); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (eptr == pp) break; /* Failed after minimal repetition */ eptr = pp; - max--; + max--; for (i = min; i < max; i++) { PCRE2_SIZE slength; @@ -2893,8 +2893,8 @@ for (;;) eptr += slength; } } - } - + } + RRETURN(MATCH_NOMATCH); } /* Control never gets here */ @@ -6417,20 +6417,20 @@ with different endianness. */ if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8) return PCRE2_ERROR_BADMODE; - -/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the -options variable for this function. Users of PCRE2 who are not calling the -function directly would like to have a way of setting these flags, in the same + +/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the +options variable for this function. Users of PCRE2 who are not calling the +function directly would like to have a way of setting these flags, in the same way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with -constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and -(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be -transferred to the options for this function. The bits are guaranteed to be -adjacent, but do not have the same values. This bit of Boolean trickery assumes -that the match-time bits are not more significant than the flag bits. If by -accident this is not the case, a compile-time division by zero error will +constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and +(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be +transferred to the options for this function. The bits are guaranteed to be +adjacent, but do not have the same values. This bit of Boolean trickery assumes +that the match-time bits are not more significant than the flag bits. If by +accident this is not the case, a compile-time division by zero error will occur. */ -#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET) +#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET) #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART) options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO)); #undef FF @@ -6541,7 +6541,7 @@ mb->match_limit = (mcontext->match_limit < re->limit_match)? mcontext->match_limit : re->limit_match; mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)? mcontext->recursion_limit : re->limit_recursion; - + /* Pointers to the individual character tables */ mb->lcc = re->tables + lcc_offset; @@ -6580,7 +6580,7 @@ switch(re->newline_convention) default: return PCRE2_ERROR_INTERNAL; } - + /* If the expression has got more back references than the offsets supplied can hold, we get a temporary chunk of memory to use during the matching. Otherwise, we can use the vector supplied. The size of the ovector is three times the @@ -6854,7 +6854,7 @@ for(;;) mb->start_match_ptr = start_match; mb->start_used_ptr = start_match; - mb->last_used_ptr = start_match; + mb->last_used_ptr = start_match; mb->match_call_count = 0; mb->match_function_type = 0; mb->end_offset_top = 0; @@ -6990,7 +6990,7 @@ while (mb->ovecsave_chain != NULL) ovecsave_frame *this = mb->ovecsave_chain; mb->ovecsave_chain = this->next; mb->memctl.free(this, mb->memctl.memory_data); - } + } /* Fill in fields that are always returned in the match data. */ @@ -7057,9 +7057,9 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) match_data->ovector[0] = mb->start_match_ptr - mb->start_subject; match_data->ovector[1] = mb->end_match_ptr - mb->start_subject; } - + /* Set the remaining returned values */ - + match_data->startchar = start_match - subject; match_data->leftchar = mb->start_used_ptr - subject; match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)? @@ -7068,7 +7068,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) } /* Control gets here if there has been a partial match, an error, or if the -overall match attempt has failed at all permitted starting positions. Any mark +overall match attempt has failed at all permitted starting positions. Any mark data is in the nomatch_mark field. */ match_data->mark = mb->nomatch_mark; diff --git a/src/pcre2_match_data.c b/src/pcre2_match_data.c index f793f39..4ed78fb 100644 --- a/src/pcre2_match_data.c +++ b/src/pcre2_match_data.c @@ -72,10 +72,10 @@ return yield; *************************************************/ PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION -pcre2_match_data_create_from_pattern(pcre2_code *code, +pcre2_match_data_create_from_pattern(pcre2_code *code, pcre2_general_context *gcontext) { -return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1, +return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1, gcontext); } @@ -88,7 +88,7 @@ return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1, PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION pcre2_match_data_free(pcre2_match_data *match_data) { -if (match_data != NULL) +if (match_data != NULL) match_data->memctl.free(match_data, match_data->memctl.memory_data); } diff --git a/src/pcre2_newline.c b/src/pcre2_newline.c index c3d6298..7f482f2 100644 --- a/src/pcre2_newline.c +++ b/src/pcre2_newline.c @@ -60,9 +60,9 @@ http://unicode.org/unicode/reports/tr18/. */ * Check for newline at given position * *************************************************/ -/* This function is called only via the IS_NEWLINE macro, which does so only +/* This function is called only via the IS_NEWLINE macro, which does so only when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed -newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit +newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit pointed to by ptr is less than the end of the string. Arguments: @@ -76,7 +76,7 @@ Returns: TRUE or FALSE */ BOOL -PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr, +PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr, uint32_t *lenptr, BOOL utf) { uint32_t c; @@ -90,15 +90,15 @@ c = *ptr; if (type == NLTYPE_ANYCRLF) switch(c) { - case CHAR_LF: - *lenptr = 1; + case CHAR_LF: + *lenptr = 1; return TRUE; - - case CHAR_CR: + + case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; return TRUE; - - default: + + default: return FALSE; } @@ -111,8 +111,8 @@ else switch(c) #endif case CHAR_LF: case CHAR_VT: - case CHAR_FF: - *lenptr = 1; + case CHAR_FF: + *lenptr = 1; return TRUE; case CHAR_CR: @@ -121,25 +121,25 @@ else switch(c) #ifndef EBCDIC #if PCRE2_CODE_UNIT_WIDTH == 8 - case CHAR_NEL: - *lenptr = utf? 2 : 1; + case CHAR_NEL: + *lenptr = utf? 2 : 1; return TRUE; - + case 0x2028: /* LS */ case 0x2029: /* PS */ - *lenptr = 3; - return TRUE; - + *lenptr = 3; + return TRUE; + #else /* 16-bit or 32-bit code units */ case CHAR_NEL: case 0x2028: /* LS */ - case 0x2029: /* PS */ - *lenptr = 1; + case 0x2029: /* PS */ + *lenptr = 1; return TRUE; #endif #endif /* Not EBCDIC */ - default: + default: return FALSE; } } @@ -166,7 +166,7 @@ Returns: TRUE or FALSE */ BOOL -PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr, +PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr, uint32_t *lenptr, BOOL utf) { uint32_t c; @@ -190,11 +190,11 @@ if (type == NLTYPE_ANYCRLF) switch(c) *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; return TRUE; - case CHAR_CR: - *lenptr = 1; + case CHAR_CR: + *lenptr = 1; return TRUE; - - default: + + default: return FALSE; } @@ -211,31 +211,31 @@ else switch(c) #endif case CHAR_VT: case CHAR_FF: - case CHAR_CR: - *lenptr = 1; + case CHAR_CR: + *lenptr = 1; return TRUE; #ifndef EBCDIC #if PCRE2_CODE_UNIT_WIDTH == 8 - case CHAR_NEL: - *lenptr = utf? 2 : 1; + case CHAR_NEL: + *lenptr = utf? 2 : 1; return TRUE; - + case 0x2028: /* LS */ case 0x2029: /* PS */ - *lenptr = 3; + *lenptr = 3; return TRUE; - + #else /* 16-bit or 32-bit code units */ case CHAR_NEL: case 0x2028: /* LS */ case 0x2029: /* PS */ - *lenptr = 1; - return TRUE; + *lenptr = 1; + return TRUE; #endif #endif /* Not EBCDIC */ - default: + default: return FALSE; } } diff --git a/src/pcre2_pattern_info.c b/src/pcre2_pattern_info.c index 1eb9a83..f9c9604 100644 --- a/src/pcre2_pattern_info.c +++ b/src/pcre2_pattern_info.c @@ -89,17 +89,17 @@ if (where == NULL) /* Requests field length */ case PCRE2_INFO_NAMECOUNT: case PCRE2_INFO_NEWLINE: case PCRE2_INFO_RECURSIONLIMIT: - return sizeof(uint32_t); + return sizeof(uint32_t); case PCRE2_INFO_FIRSTBITMAP: return sizeof(const uint8_t *); case PCRE2_INFO_JITSIZE: case PCRE2_INFO_SIZE: - return sizeof(size_t); + return sizeof(size_t); case PCRE2_INFO_NAMETABLE: - return sizeof(PCRE2_SPTR); + return sizeof(PCRE2_SPTR); } } diff --git a/src/pcre2_printint.c b/src/pcre2_printint.c index 0ad7dbf..4c2dd32 100644 --- a/src/pcre2_printint.c +++ b/src/pcre2_printint.c @@ -41,8 +41,8 @@ POSSIBILITY OF SUCH DAMAGE. /* This module contains a PCRE private debugging function for printing out the internal form of a compiled regular expression, along with some supporting -local functions. This source file is #included in pcre2test.c at each supported -code unit width, with PCRE2_SUFFIX set appropriately, just like the functions +local functions. This source file is #included in pcre2test.c at each supported +code unit width, with PCRE2_SUFFIX set appropriately, just like the functions that comprise the library. */ @@ -82,9 +82,9 @@ Arguments: f file to write to ptr pointer to first code unit of the character utf TRUE if string is UTF (will be FALSE if UTF is not supported) - + Returns: number of additional code units used -*/ +*/ static unsigned int print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf) @@ -105,7 +105,7 @@ if (utf) one_code_unit = (c & 0xfffff800u) != 0xd800u; #endif /* CODE_UNIT_WIDTH */ } -#endif /* SUPPORT_UNICODE */ +#endif /* SUPPORT_UNICODE */ /* Handle a valid one-code-unit character at any width. */ @@ -115,10 +115,10 @@ if (one_code_unit) else if (c < 0x80) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%02x}", c); return 0; - } + } -/* Code for invalid UTF code units and multi-unit UTF characters is different -for each width. If UTF is not supported, control should never get here, but we +/* Code for invalid UTF code units and multi-unit UTF characters is different +for each width. If UTF is not supported, control should never get here, but we need a return statement to keep the compiler happy. */ #ifndef SUPPORT_UNICODE @@ -134,10 +134,10 @@ if ((c & 0xc0) != 0xc0) { fprintf(f, "\\X{%x}", c); /* Invalid starting byte */ return 0; - } + } else { - int i; + int i; int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */ int s = 6*a; c = (c & utf8_table3[a]) << s; @@ -153,7 +153,7 @@ else } fprintf(f, "\\x{%x}", c); return a; -} +} #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ /* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one. @@ -173,7 +173,7 @@ return 1; /* For UTF-32 we get here only for a malformed code unit, which should only occur if the sanity check has been turned off. Print it with \X instead of \x as an indication. */ - + #if PCRE2_CODE_UNIT_WIDTH == 32 fprintf(f, "\\X{%x}", c); return 0; @@ -187,15 +187,15 @@ return 0; * Print string as a list of code units * *************************************************/ -/* This takes no account of UTF as it always prints each individual code unit. +/* This takes no account of UTF as it always prints each individual code unit. The string is zero-terminated. Arguments: f file to write to ptr point to the string - + Returns: nothing -*/ +*/ static void print_custring(FILE *f, PCRE2_SPTR ptr) @@ -213,9 +213,9 @@ while (*ptr != '\0') * Find Unicode property name * *************************************************/ -/* When there is no UTF/UCP support, the table of names does not exist. This -function should not be called in such configurations, because a pattern that -tries to use Unicode properties won't compile. Rather than put lots of #ifdefs +/* When there is no UTF/UCP support, the table of names does not exist. This +function should not be called in such configurations, because a pattern that +tries to use Unicode properties won't compile. Rather than put lots of #ifdefs into the main code, however, we just put one into this function. */ static const char * @@ -244,15 +244,15 @@ return "??"; /* "Normal" properties can be printed from tables. The PT_CLIST property is a pseudo-property that contains a pointer to a list of case-equivalent -characters. +characters. Arguments: f file to write to code pointer in the compiled code before text to print before after text to print after - -Returns: nothing + +Returns: nothing */ static void @@ -281,14 +281,14 @@ else /* The print_lengths flag controls whether offsets and lengths of items are printed. Lenths can be turned off from pcre2test so that automatic tests on -bytecode can be written that do not depend on the value of LINK_SIZE. +bytecode can be written that do not depend on the value of LINK_SIZE. Arguments: re a compiled pattern f the file to write to - print_lenghts show various lengths - -Returns: nothing + print_lenghts show various lengths + +Returns: nothing */ static void @@ -460,7 +460,7 @@ for(;;) case OP_TYPEMINQUERY: case OP_TYPEPOSQUERY: fprintf(f, " %s ", flag); - + if (*code >= OP_TYPESTAR) { if (code[1] == OP_PROP || code[1] == OP_NOTPROP) diff --git a/src/pcre2_string_utils.c b/src/pcre2_string_utils.c index 1091d60..59487b1 100644 --- a/src/pcre2_string_utils.c +++ b/src/pcre2_string_utils.c @@ -39,7 +39,7 @@ POSSIBILITY OF SUCH DAMAGE. */ /* This module contains internal functions for comparing and finding the length -of strings. These are used instead of strcmp() etc because the standard +of strings. These are used instead of strcmp() etc because the standard functions work only on 8-bit data. */ @@ -54,7 +54,7 @@ functions work only on 8-bit data. */ * Compare two zero-terminated PCRE2 strings * *************************************************/ -/* +/* Arguments: str1 first string str2 second string @@ -80,7 +80,7 @@ return 0; * Compare zero-terminated PCRE2 & 8-bit strings * *************************************************/ -/* As the 8-bit string is almost always a literal, its type is specified as +/* As the 8-bit string is almost always a literal, its type is specified as const char *. Arguments: @@ -108,7 +108,7 @@ return 0; * Compare two PCRE2 strings, given a length * *************************************************/ -/* +/* Arguments: str1 first string str2 second string @@ -135,7 +135,7 @@ return 0; * Compare PCRE2 string to 8-bit string by length * *************************************************/ -/* As the 8-bit string is almost always a literal, its type is specified as +/* As the 8-bit string is almost always a literal, its type is specified as const char *. Arguments: @@ -164,7 +164,7 @@ return 0; * Find the length of a PCRE2 string * *************************************************/ -/* +/* Argument: the string Returns: the length */ @@ -185,9 +185,9 @@ return c; /* Arguments: str1 buffer to receive the string str2 8-bit string to be copied - + Returns: the number of code units used (excluding trailing zero) -*/ +*/ int PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2) diff --git a/src/pcre2_study.c b/src/pcre2_study.c index 2e673c3..23a6175 100644 --- a/src/pcre2_study.c +++ b/src/pcre2_study.c @@ -74,7 +74,7 @@ Arguments: code pointer to start of group (the bracket) startcode pointer to start of the whole pattern's code recurse_depth RECURSE depth - utf UTF flag + utf UTF flag Returns: the minimum length -1 if \C in UTF-8 mode or (*ACCEPT) was encountered @@ -388,10 +388,10 @@ for (;;) if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0) { int count = GET2(cc, 1+IMM2_SIZE); - PCRE2_UCHAR *slot = + PCRE2_UCHAR *slot = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + - GET2(cc, 1) * re->name_entry_size; - + GET2(cc, 1) * re->name_entry_size; + d = INT_MAX; while (count-- > 0) { @@ -579,7 +579,7 @@ for (;;) *************************************************/ /* Given a character, set its first code unit's bit in the table, and also the -corresponding bit for the other version of a letter if we are caseless. +corresponding bit for the other version of a letter if we are caseless. Arguments: re points to the regex block @@ -590,20 +590,20 @@ Arguments: Returns: pointer after the character */ -static PCRE2_SPTR +static PCRE2_SPTR set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf) { uint32_t c = *p++; /* First code unit */ (void)utf; /* Stop compiler warning when UTF not supported */ -/* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for +/* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for 0xff. */ #if PCRE2_CODE_UNIT_WIDTH != 8 -if (c > 0xff) SET_BIT(0xff); else +if (c > 0xff) SET_BIT(0xff); else #endif -SET_BIT(c); +SET_BIT(c); /* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find the end of the character, even when caseless. */ @@ -617,7 +617,7 @@ if (utf) if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p); #endif } -#endif /* SUPPORT_UNICODE */ +#endif /* SUPPORT_UNICODE */ /* If caseless, handle the other case of the character. */ @@ -669,7 +669,7 @@ static void set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit) { register uint32_t c; -for (c = 0; c < table_limit; c++) +for (c = 0; c < table_limit; c++) re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type]; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 if (table_limit == 32) return; @@ -710,7 +710,7 @@ static void set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit) { register uint32_t c; -for (c = 0; c < table_limit; c++) +for (c = 0; c < table_limit; c++) re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff; @@ -724,10 +724,10 @@ if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff; *************************************************/ /* This function scans a compiled unanchored expression recursively and -attempts to build a bitmap of the set of possible starting code units whose -values are less than 256. In 16-bit and 32-bit mode, values above 255 all cause +attempts to build a bitmap of the set of possible starting code units whose +values are less than 256. In 16-bit and 32-bit mode, values above 255 all cause the 255 bit to be set. When calling set[_not]_type_bits() in UTF-8 (sic) mode -we pass a value of 16 rather than 32 as the final argument. (See comments in +we pass a value of 16 rather than 32 as the final argument. (See comments in those functions for the reason.) The SSB_CONTINUE return is useful for parenthesized groups in patterns such as @@ -769,8 +769,8 @@ do while (try_next) /* Loop for items in this branch */ { int rc; - uint8_t *classmap = NULL; - + uint8_t *classmap = NULL; + switch(*tcode) { /* If we reach something we don't understand, it means a new opcode has @@ -854,31 +854,31 @@ do case OP_THEN: case OP_THEN_ARG: return SSB_FAIL; - + /* A "real" property test implies no starting bits, but the fake property PT_CLIST identifies a list of characters. These lists are short, as they are used for characters with more than one "other case", so there is no point in recognizing them for OP_NOTPROP. */ - + case OP_PROP: if (tcode[1] != PT_CLIST) return SSB_FAIL; - { + { const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2]; while ((c = *p++) < NOTACHAR) { -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 if (utf) { PCRE2_UCHAR buff[6]; (void)PRIV(ord2utf)(c, buff); c = buff[0]; - } -#endif + } +#endif if (c > 0xff) SET_BIT(0xff); else SET_BIT(c); } - } + } try_next = FALSE; - break; + break; /* We can ignore word boundary tests. */ @@ -1032,14 +1032,14 @@ do SET_BIT(CHAR_HT); SET_BIT(CHAR_SPACE); - /* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set + /* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set the bits for 0xA0 and for code units >= 255, independently of UTF. */ #if PCRE2_CODE_UNIT_WIDTH != 8 SET_BIT(0xA0); SET_BIT(0xFF); #else - /* For the 8-bit library in UTF-8 mode, set the bits for the first code + /* For the 8-bit library in UTF-8 mode, set the bits for the first code units of horizontal space characters. */ #ifdef SUPPORT_UNICODE @@ -1052,7 +1052,7 @@ do } else #endif - /* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless + /* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless the code is EBCDIC. */ { #ifndef EBCDIC @@ -1060,7 +1060,7 @@ do #endif /* Not EBCDIC */ } #endif /* 8-bit support */ - + try_next = FALSE; break; @@ -1071,16 +1071,16 @@ do SET_BIT(CHAR_FF); SET_BIT(CHAR_CR); - /* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set + /* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set the bits for NEL and for code units >= 255, independently of UTF. */ #if PCRE2_CODE_UNIT_WIDTH != 8 SET_BIT(CHAR_NEL); SET_BIT(0xFF); #else - /* For the 8-bit library in UTF-8 mode, set the bits for the first code + /* For the 8-bit library in UTF-8 mode, set the bits for the first code units of vertical space characters. */ - + #ifdef SUPPORT_UNICODE if (utf) { @@ -1093,8 +1093,8 @@ do { SET_BIT(CHAR_NEL); } -#endif /* 8-bit support */ - +#endif /* 8-bit support */ + try_next = FALSE; break; @@ -1166,7 +1166,7 @@ do case OP_ANY: case OP_ALLANY: return SSB_FAIL; - + case OP_HSPACE: SET_BIT(CHAR_HT); SET_BIT(CHAR_SPACE); @@ -1178,7 +1178,7 @@ do SET_BIT(0xA0); SET_BIT(0xFF); #else - /* For the 8-bit library in UTF-8 mode, set the bits for the first code + /* For the 8-bit library in UTF-8 mode, set the bits for the first code units of horizontal space characters. */ #ifdef SUPPORT_UNICODE @@ -1191,7 +1191,7 @@ do } else #endif - /* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless + /* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless the code is EBCDIC. */ { #ifndef EBCDIC @@ -1208,16 +1208,16 @@ do SET_BIT(CHAR_FF); SET_BIT(CHAR_CR); - /* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set + /* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set the bits for NEL and for code units >= 255, independently of UTF. */ #if PCRE2_CODE_UNIT_WIDTH != 8 SET_BIT(CHAR_NEL); SET_BIT(0xFF); #else - /* For the 8-bit library in UTF-8 mode, set the bits for the first code + /* For the 8-bit library in UTF-8 mode, set the bits for the first code units of vertical space characters. */ - + #ifdef SUPPORT_UNICODE if (utf) { @@ -1230,7 +1230,7 @@ do { SET_BIT(CHAR_NEL); } -#endif /* 8-bit support */ +#endif /* 8-bit support */ break; case OP_NOT_DIGIT: @@ -1260,8 +1260,8 @@ do tcode += 2; break; - - /* Extended class: if there are any property checks, or if this is a + + /* Extended class: if there are any property checks, or if this is a negative XCLASS without a map, give up. If there are no property checks, there must be wide characters on the XCLASS list, because otherwise an XCLASS would not have been created. This means that code points >= 255 @@ -1270,19 +1270,19 @@ do #ifdef SUPPORT_WIDE_CHARS case OP_XCLASS: if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0 || - (tcode[1 + LINK_SIZE] & (XCL_MAP|XCL_NOT)) == XCL_NOT) + (tcode[1 + LINK_SIZE] & (XCL_MAP|XCL_NOT)) == XCL_NOT) return SSB_FAIL; - + /* We have a positive XCLASS or a negative one without a map. Set up the map pointer if there is one, and fall through. */ - + classmap = ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0)? NULL : (uint8_t *)(tcode + 1 + LINK_SIZE + 1); #endif /* Enter here for a negative non-XCLASS. In the 8-bit library, if we are in UTF mode, any byte with a value >= 0xc4 is a potentially valid starter - because it starts a character with a value > 255. In 8-bit non-UTF mode, + because it starts a character with a value > 255. In 8-bit non-UTF mode, there is no difference between CLASS and NCLASS. In all other wide character modes, set the 0xFF bit to indicate code units >= 255. */ @@ -1298,26 +1298,26 @@ do #endif /* Fall through */ - /* Enter here for a positive non-XCLASS. If we have fallen through from - an XCLASS, classmap will already be set; just advance the code pointer. + /* Enter here for a positive non-XCLASS. If we have fallen through from + an XCLASS, classmap will already be set; just advance the code pointer. Otherwise, set up classmap for a a non-XCLASS and advance past it. */ - + case OP_CLASS: if (*tcode == OP_XCLASS) tcode += GET(tcode, 1); else - { + { classmap = (uint8_t *)(++tcode); tcode += 32 / sizeof(PCRE2_UCHAR); } - + /* When wide characters are supported, classmap may be NULL. In UTF-8 (sic) mode, the bits in a class bit map correspond to character values, not to byte values. However, the bit map we are constructing is for byte - values. So we have to do a conversion for characters whose code point is + values. So we have to do a conversion for characters whose code point is greater than 127. In fact, there are only two possible starting bytes for characters in the range 128 - 255. */ - + if (classmap != NULL) - { + { #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 if (utf) { @@ -1334,11 +1334,11 @@ do } else #endif - /* In all modes except UTF-8, the two bit maps are compatible. */ - + /* In all modes except UTF-8, the two bit maps are compatible. */ + { for (c = 0; c < 32; c++) re->start_bitmap[c] |= classmap[c]; - } + } } /* Act on what follows the class. For a zero minimum repeat, continue; @@ -1384,13 +1384,13 @@ return yield; *************************************************/ /* This function is handed a compiled expression that it must study to produce -information that will speed up the matching. +information that will speed up the matching. Argument: points to the compiled expression Returns: 0 normally; non-zero should never normally occur 1 unknown opcode in set_start_bits 2 missing capturing bracket - 3 unknown opcode in find_minlength + 3 unknown opcode in find_minlength */ int @@ -1402,7 +1402,7 @@ BOOL utf = (re->overall_options & PCRE2_UTF) != 0; /* Find start of compiled code */ -code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + +code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + re->name_entry_size * re->name_count; /* For an anchored pattern, or an unanchored pattern that has a first code @@ -1422,17 +1422,17 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 && switch(min = find_minlength(re, code, code, 0, utf)) { case -1: /* \C in UTF mode or (*ACCEPT) was encountered */ - break; - + break; + case -2: return 2; /* missing capturing bracket */ - + case -3: return 3; /* unrecognized opcode */ - + default: re->minlength = min; - break; + break; } return 0; diff --git a/src/pcre2_substring.c b/src/pcre2_substring.c index 9e737e6..0b42d30 100644 --- a/src/pcre2_substring.c +++ b/src/pcre2_substring.c @@ -81,7 +81,7 @@ for (entry = first; entry <= last; entry += entrysize) { uint16_t n = GET2(entry, 0); if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET) - return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr); + return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr); } return PCRE2_ERROR_NOSUBSTRING; } @@ -108,7 +108,7 @@ Returns: if successful: 0 */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_substring_copy_bynumber(pcre2_match_data *match_data, +pcre2_substring_copy_bynumber(pcre2_match_data *match_data, unsigned int stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr) { PCRE2_SIZE left, right; @@ -119,7 +119,7 @@ if (stringnumber >= match_data->oveccount || (left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET) return PCRE2_ERROR_NOSUBSTRING; right = match_data->ovector[stringnumber*2+1]; -if (right - left + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY; +if (right - left + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY; while (left < right) buffer[p++] = subject[left++]; buffer[p] = 0; *sizeptr = p; @@ -140,7 +140,7 @@ Arguments: match_data pointer to match_data stringname the name of the required substring stringptr where to put the pointer to the new memory - sizeptr where to put the length of the substring + sizeptr where to put the length of the substring Returns: if successful: zero if not successful, a negative value: @@ -162,7 +162,7 @@ for (entry = first; entry <= last; entry += entrysize) { uint16_t n = GET2(entry, 0); if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET) - return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr); + return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr); } return PCRE2_ERROR_NOSUBSTRING; } @@ -180,7 +180,7 @@ Arguments: match_data points to match data stringnumber the number of the required substring stringptr where to put a pointer to the new memory - sizeptr where to put the size of the substring + sizeptr where to put the size of the substring Returns: if successful: zero if not successful a negative error code: @@ -189,7 +189,7 @@ Returns: if successful: zero */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_substring_get_bynumber(pcre2_match_data *match_data, +pcre2_substring_get_bynumber(pcre2_match_data *match_data, unsigned int stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr) { PCRE2_SIZE left, right; @@ -204,8 +204,8 @@ if (stringnumber >= match_data->oveccount || return PCRE2_ERROR_NOSUBSTRING; right = match_data->ovector[stringnumber*2+1]; -block = PRIV(memctl_malloc)(sizeof(pcre2_memctl) + - (right-left+1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data); +block = PRIV(memctl_malloc)(sizeof(pcre2_memctl) + + (right-left+1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data); if (block == NULL) return PCRE2_ERROR_NOMEMORY; yield = (PCRE2_UCHAR *)((char *)block + sizeof(pcre2_memctl)); @@ -222,7 +222,7 @@ return 0; * Free memory obtained by get_substring * *************************************************/ -/* +/* Argument: the result of a previous pcre2_substring_get_byxxx() Returns: nothing */ @@ -246,7 +246,7 @@ permits duplicate names, the first substring that is set is chosen. Arguments: match_data pointer to match data stringname the name of the required substring - sizeptr where to put the length + sizeptr where to put the length Returns: 0 if successful, else a negative error number */ @@ -265,7 +265,7 @@ for (entry = first; entry <= last; entry += entrysize) { uint16_t n = GET2(entry, 0); if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET) - return pcre2_substring_length_bynumber(match_data, n, sizeptr); + return pcre2_substring_length_bynumber(match_data, n, sizeptr); } return PCRE2_ERROR_NOSUBSTRING; } @@ -281,7 +281,7 @@ return PCRE2_ERROR_NOSUBSTRING; Arguments: match_data pointer to match data stringnumber the number of the required substring - sizeptr where to put the length + sizeptr where to put the length Returns: 0 if successful, else a negative error number */ @@ -296,7 +296,7 @@ if (stringnumber >= match_data->oveccount || return PCRE2_ERROR_NOSUBSTRING; *sizeptr = match_data->ovector[stringnumber*2 + 1] - match_data->ovector[stringnumber*2]; -return 0; +return 0; } @@ -307,23 +307,23 @@ return 0; /* This function gets one chunk of memory and builds a list of pointers and all the captured substrings in it. A NULL pointer is put on the end of the list. -The substrings are zero-terminated, but also, if the final argument is -non-NULL, a list of lengths is also returned. This allows binary data to be +The substrings are zero-terminated, but also, if the final argument is +non-NULL, a list of lengths is also returned. This allows binary data to be handled. Arguments: match_data points to the match data listptr set to point to the list of pointers - lengthsptr set to point to the list of lengths (may be NULL) + lengthsptr set to point to the list of lengths (may be NULL) Returns: if successful: 0 if not successful, a negative error code: PCRE2_ERROR_NOMEMORY: failed to get memory, - or a match failure code + or a match failure code */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr, +pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr) { int i, count, count2; @@ -343,22 +343,22 @@ if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */ for (i = 0; i < count2; i += 2) size += sizeof(PCRE2_UCHAR *) + CU2BYTES(ovector[i+1] - ovector[i] + 1); -memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data); +memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data); if (memp == NULL) return PCRE2_ERROR_NOMEMORY; *listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl)); lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1)); - + if (lengthsptr == NULL) { - sp = (PCRE2_UCHAR *)lensp; + sp = (PCRE2_UCHAR *)lensp; lensp = NULL; } else - { - *lengthsptr = lensp; - sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count); - } + { + *lengthsptr = lensp; + sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count); + } for (i = 0; i < count2; i += 2) { @@ -398,9 +398,9 @@ memctl->free(memctl, memctl->memory_data); * Find (multiple) entries for named string * *************************************************/ -/* This function scans the nametable for a given name, using binary chop. It -returns either two pointers to the entries in the table, or, if no pointers are -given, the number of a group with the given name. If duplicate names are +/* This function scans the nametable for a given name, using binary chop. It +returns either two pointers to the entries in the table, or, if no pointers are +given, the number of a group with the given name. If duplicate names are permitted, this may not be unique. Arguments: @@ -428,11 +428,11 @@ while (top > bot) uint16_t mid = (top + bot) / 2; PCRE2_SPTR entry = nametable + entrysize*mid; int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE); - if (c == 0) + if (c == 0) { PCRE2_SPTR first; PCRE2_SPTR last; - PCRE2_SPTR lastentry; + PCRE2_SPTR lastentry; if (firstptr == NULL) return GET2(entry, 0); lastentry = nametable + entrysize * (code->name_count - 1); first = last = entry; @@ -447,7 +447,7 @@ while (top > bot) last += entrysize; } *firstptr = first; - *lastptr = last; + *lastptr = last; return entrysize; } if (c > 0) bot = mid + 1; else top = mid; @@ -462,7 +462,7 @@ return PCRE2_ERROR_NOSUBSTRING; *************************************************/ /* This function is a convenience wrapper for pcre2_substring_nametable_scan() -when it is known that names are unique. If there are duplicate names, it is not +when it is known that names are unique. If there are duplicate names, it is not defined which number is returned. Arguments: @@ -474,7 +474,7 @@ Returns: the number of the named parenthesis, or a negative number */ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION -pcre2_substring_number_from_name(const pcre2_code *code, +pcre2_substring_number_from_name(const pcre2_code *code, PCRE2_SPTR stringname) { return pcre2_substring_nametable_scan(code, stringname, NULL, NULL); diff --git a/src/pcre2_ucp.h b/src/pcre2_ucp.h index 9eeefa6..e7db0c0 100644 --- a/src/pcre2_ucp.h +++ b/src/pcre2_ucp.h @@ -232,7 +232,7 @@ enum { ucp_Takri, /* New for Unicode 7.0.0: */ ucp_Bassa_Vah, - ucp_Caucasian_Albanian, + ucp_Caucasian_Albanian, ucp_Duployan, ucp_Elbasan, ucp_Grantha, diff --git a/src/pcre2_valid_utf.c b/src/pcre2_valid_utf.c index 267dc66..d0378c9 100644 --- a/src/pcre2_valid_utf.c +++ b/src/pcre2_valid_utf.c @@ -154,11 +154,11 @@ for (p = string; length-- > 0; p++) *erroroffset = (int)(p - string); switch(ab - length) { - case 1: return PCRE2_ERROR_UTF8_ERR1; - case 2: return PCRE2_ERROR_UTF8_ERR2; - case 3: return PCRE2_ERROR_UTF8_ERR3; - case 4: return PCRE2_ERROR_UTF8_ERR4; - case 5: return PCRE2_ERROR_UTF8_ERR5; + case 1: return PCRE2_ERROR_UTF8_ERR1; + case 2: return PCRE2_ERROR_UTF8_ERR2; + case 3: return PCRE2_ERROR_UTF8_ERR3; + case 4: return PCRE2_ERROR_UTF8_ERR4; + case 5: return PCRE2_ERROR_UTF8_ERR5; } } length -= ab; /* Length remaining */ @@ -314,7 +314,7 @@ return 0; /* ----------------- Check a UTF-16 string ----------------- */ -#elif PCRE2_CODE_UNIT_WIDTH == 16 +#elif PCRE2_CODE_UNIT_WIDTH == 16 /* There's not so much work, nor so many errors, for UTF-16. PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string diff --git a/src/pcre2_xclass.c b/src/pcre2_xclass.c index 2deb428..2ea89c4 100644 --- a/src/pcre2_xclass.c +++ b/src/pcre2_xclass.c @@ -60,7 +60,7 @@ might contain codepoints above 255 and/or Unicode properties. Arguments: c the character data points to the flag code unit of the XCLASS data - utf TRUE if in UTF mode + utf TRUE if in UTF mode Returns: TRUE if character matches, else FALSE */ @@ -261,7 +261,7 @@ while ((t = *data++) != XCL_END) data += 2; } #else - (void)utf; /* Avoid compiler warning */ + (void)utf; /* Avoid compiler warning */ #endif /* SUPPORT_UNICODE */ } diff --git a/src/pcre2demo.c b/src/pcre2demo.c index 8ad6e6f..ec51cf1 100644 --- a/src/pcre2demo.c +++ b/src/pcre2demo.c @@ -8,7 +8,7 @@ pcre2sample documentation for a short discussion ("man pcre2sample" if you have the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is incompatible with the original PCRE API. -There are actually three libraries, each supporting a different code unit +There are actually three libraries, each supporting a different code unit width. This demonstration program uses the 8-bit library. In Unix-like environments, if PCRE2 is installed in your standard system @@ -39,8 +39,8 @@ the following line. */ /* #define PCRE2_STATIC */ -/* This macro must be defined before including pcre2.h. For a program that uses -only one code unit width, it makes it possible to use generic function names +/* This macro must be defined before including pcre2.h. For a program that uses +only one code unit width, it makes it possible to use generic function names such as pcre2_compile(). */ #define PCRE2_CODE_UNIT_WIDTH 8 @@ -124,7 +124,7 @@ subject_length = strlen((char *)subject); re = pcre2_compile( pattern, /* the pattern */ - PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ + PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ 0, /* default options */ &errornumber, /* for error number */ &erroroffset, /* for error offset */ @@ -134,9 +134,9 @@ re = pcre2_compile( if (re == NULL) { - PCRE2_UCHAR buffer[256]; + PCRE2_UCHAR buffer[256]; pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); - printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, + printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, buffer); return 1; } @@ -180,7 +180,7 @@ if (rc < 0) return 1; } -/* Match succeded. Get a pointer to the output vector, where string offsets are +/* Match succeded. Get a pointer to the output vector, where string offsets are stored. */ ovector = pcre2_get_ovector_pointer(match_data); @@ -193,7 +193,7 @@ printf("\nMatch succeeded at offset %d\n", (int)ovector[0]); * captured. * *************************************************************************/ -/* The output vector wasn't big enough. This should not happen, because we used +/* The output vector wasn't big enough. This should not happen, because we used pcre2_match_data_create_from_pattern() above. */ if (rc == 0) @@ -244,7 +244,7 @@ if (namecount <= 0) printf("No named substrings\n"); else &name_entry_size); /* where to put the answer */ /* Now we can scan the table and, for each entry, print the number, the name, - and the substring itself. In the 8-bit library the number is held in two + and the substring itself. In the 8-bit library the number is held in two bytes, most significant first. */ tabptr = name_table; @@ -289,7 +289,7 @@ if (namecount <= 0) printf("No named substrings\n"); else if (!find_all) /* Check for -g */ { - pcre2_match_data_free(match_data); /* Release the memory that was used */ + pcre2_match_data_free(match_data); /* Release the memory that was used */ pcre2_code_free(re); /* for the match data and the pattern. */ return 0; /* Exit the program. */ } @@ -307,7 +307,7 @@ sequence. */ (void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline); crlf_is_newline = newline == PCRE2_NEWLINE_ANY || newline == PCRE2_NEWLINE_CRLF || - newline == PCRE2_NEWLINE_ANYCRLF; + newline == PCRE2_NEWLINE_ANYCRLF; /* Loop for second and subsequent matches */ diff --git a/src/pcre2grep.c b/src/pcre2grep.c index c8c9ba1..ee374de 100644 --- a/src/pcre2grep.c +++ b/src/pcre2grep.c @@ -450,7 +450,7 @@ pcre2grep_exit(int rc) if (resource_error) { fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit " - "was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT, + "was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT, PCRE2_ERROR_RECURSIONLIMIT); fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n"); } @@ -485,7 +485,7 @@ if (strlen(s) > MAXPATLEN) { fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n", MAXPATLEN); - free(p); + free(p); return NULL; } p->next = NULL; @@ -2381,7 +2381,7 @@ switch(letter) unsigned char buffer[128]; (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer); fprintf(stdout, "pcre2grep version %s\n", buffer); - } + } pcre2grep_exit(0); break; @@ -2472,7 +2472,7 @@ if ((popts & PO_FIXED_STRINGS) != 0) } sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]); -p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset, +p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset, compile_context); if (p->compiled != NULL) return TRUE; @@ -2555,11 +2555,11 @@ while (fgets(buffer, PATBUFSIZE, f) != NULL) afterwards, as a precaution against any later code trying to use it. */ *patlastptr = add_pattern(buffer, *patlastptr); - if (*patlastptr == NULL) + if (*patlastptr == NULL) { if (f != stdin) fclose(f); return FALSE; - } + } if (*patptr == NULL) *patptr = *patlastptr; /* This loop is needed because compiling a "pattern" when -F is set may add @@ -2571,10 +2571,10 @@ while (fgets(buffer, PATBUFSIZE, f) != NULL) { if (!compile_pattern(*patlastptr, pcre2_options, popts, TRUE, filename, linenumber)) - { + { if (f != stdin) fclose(f); return FALSE; - } + } (*patlastptr)->string = NULL; /* Insurance */ if ((*patlastptr)->next == NULL) break; *patlastptr = (*patlastptr)->next; @@ -2622,7 +2622,7 @@ for (i = 1; i < argc; i++) char *option_data = (char *)""; /* default to keep compiler happy */ BOOL longop; BOOL longopwasequals = FALSE; - + if (argv[i][0] != '-') break; /* If we hit an argument that is just "-", it may be a reference to STDIN, @@ -2925,7 +2925,7 @@ for (i = 1; i < argc; i++) else *((int *)op->dataptr) = n; } } - + /* Options have been decoded. If -C was used, its value is used as a default for -A and -B. */ @@ -2946,15 +2946,15 @@ if ((only_matching != NULL && (file_offsets || line_offsets)) || "and/or --line-offsets\n"); pcre2grep_exit(usage(2)); } - + /* Put limits into the match data block. */ -if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit); -if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit); +if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit); +if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit); if (only_matching != NULL || file_offsets || line_offsets) show_only_matching = TRUE; - + /* If a locale has not been provided as an option, see if the LC_CTYPE or LC_ALL environment variable is set, and if so, use it. */ @@ -2980,7 +2980,7 @@ if (locale != NULL) locale, locale_from); goto EXIT2; } - pcre2_set_character_tables(compile_context, pcre2_maketables(NULL)); + pcre2_set_character_tables(compile_context, pcre2_maketables(NULL)); } /* Sort out colouring */ @@ -3007,27 +3007,27 @@ if (colour_option != NULL && strcmp(colour_option, "never") != 0) if (newline_arg != NULL) { - for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *)); + for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *)); endlinetype++) { if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break; } if (endlinetype < (int)(sizeof(newlines)/sizeof(char *))) pcre2_set_newline(compile_context, endlinetype); - else + else { - fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n", + fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n", newline_arg); goto EXIT2; - } + } } - -/* Find default newline convention */ - + +/* Find default newline convention */ + else { (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype); - } + } /* Interpret the text values for -d and -D */ diff --git a/src/pcre2posix.c b/src/pcre2posix.c index 42e7967..e57b882 100644 --- a/src/pcre2posix.c +++ b/src/pcre2posix.c @@ -68,7 +68,7 @@ already set. */ #include "pcre2_internal.h" #include "pcre2posix.h" -/* Table to translate PCRE2 compile time error codes into POSIX error codes. +/* Table to translate PCRE2 compile time error codes into POSIX error codes. Only a few PCRE2 errors with a value greater than 23 turn into special POSIX codes: most go to REG_BADPAT. The second table lists, in pairs, those that don't. */ @@ -89,7 +89,7 @@ static const int eint1[] = { REG_ASSERT, /* internal error: unexpected repeat */ REG_BADPAT, /* unrecognized character after (? or (?- */ REG_BADPAT, /* POSIX named classes are supported only within a class */ - REG_BADPAT, /* POSIX collating elements are not supported */ + REG_BADPAT, /* POSIX collating elements are not supported */ REG_EPAREN, /* missing ) */ /* 15 */ REG_ESUBREG, /* reference to non-existent subpattern */ @@ -103,7 +103,7 @@ static const int eint1[] = { REG_EPAREN, /* unmatched closing parenthesis */ REG_ASSERT /* internal error: code overflow */ }; - + static const int eint2[] = { 30, REG_ECTYPE, /* unknown POSIX class name */ 32, REG_INVARG, /* this version of PCRE does not have UTF or UCP support */ @@ -216,14 +216,14 @@ if ((cflags & REG_UTF) != 0) options |= PCRE2_UTF; if ((cflags & REG_UCP) != 0) options |= PCRE2_UCP; if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY; -preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, -1, options, +preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, -1, options, &errorcode, &erroffset, NULL); preg->re_erroffset = erroffset; if (preg->re_pcre2_code == NULL) { - unsigned int i; - if (errorcode < 0) return REG_BADPAT; /* UTF error */ + unsigned int i; + if (errorcode < 0) return REG_BADPAT; /* UTF error */ errorcode -= COMPILE_ERROR_BASE; if (errorcode < (int)(sizeof(eint1)/sizeof(const int))) return eint1[errorcode]; @@ -232,7 +232,7 @@ if (preg->re_pcre2_code == NULL) return REG_BADPAT; } -(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code, +(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code, PCRE2_INFO_CAPTURECOUNT, &re_nsub); preg->re_nsub = (size_t)re_nsub; if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) re_nsub = -1; @@ -288,7 +288,7 @@ else eo = (int)strlen(string); } -rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code, +rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code, (PCRE2_SPTR)string + so, (eo - so), 0, options, md, NULL); /* Successful match */ diff --git a/src/pcre2posix.h b/src/pcre2posix.h index f9f68be..6f19b51 100644 --- a/src/pcre2posix.h +++ b/src/pcre2posix.h @@ -95,7 +95,7 @@ enum { typedef struct { void *re_pcre2_code; - void *re_match_data; + void *re_match_data; size_t re_nsub; size_t re_erroffset; } regex_t; diff --git a/src/pcre2test.c b/src/pcre2test.c index a53976f..25e96ed 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -4797,9 +4797,9 @@ for (gmatched = 0;; gmatched++) PCRE2_SIZE length; uint32_t copybuffer[256]; int namelen = strlen((const char *)nptr); -#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32 +#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32 PCRE2_SIZE cnl = namelen; -#endif +#endif if (namelen == 0) break; #ifdef SUPPORT_PCRE2_8 @@ -4864,9 +4864,9 @@ for (gmatched = 0;; gmatched++) void *gotbuffer; int rc; int namelen = strlen((const char *)nptr); -#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32 +#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32 PCRE2_SIZE cnl = namelen; -#endif +#endif if (namelen == 0) break; #ifdef SUPPORT_PCRE2_8 @@ -5389,25 +5389,25 @@ if (PO(options) != DO(options) || PO(control) != DO(control)) return 1; } -/* Get the PCRE2 and Unicode version number and JIT target information, at the -same time checking that a request for the length gives the same answer. Also +/* Get the PCRE2 and Unicode version number and JIT target information, at the +same time checking that a request for the length gives the same answer. Also check lengths for non-string items. */ -if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) != +if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) != PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) || - + PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) != PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) || - + PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) != PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) || - + PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(int) || - PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(long int)) + PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(long int)) { fprintf(stderr, "** Error in pcre2_config(): bad length\n"); return 1; - } + } /* Get buffers from malloc() so that valgrind will check their misuse when debugging. They grow automatically when very long lines are read. The 16-

pcre2_assign_jit_stack   Assign stack for JIT matching
pcre2_maketables   Build character tables in current locale
pcre2_pattern_to_host_byte_order   Convert compiled pattern to host byte order if necessary