Change lengths to PCRE2_SIZE and revise OP_RECURSE processing.

This commit is contained in:
Philip.Hazel 2014-08-16 09:46:58 +00:00
parent d108fc47ad
commit 7efba85b56
23 changed files with 499 additions and 369 deletions

View File

@ -429,7 +429,7 @@ endif # WITH_PCRE8
if WITH_JIT if WITH_JIT
TESTS += pcre2_jit_test TESTS += pcre2_jit_test
noinst_PROGRAMS += pcre2_jit_test noinst_PROGRAMS += pcre2_jit_test
pcre2_jit_test_SOURCES = pcre2_jit_test.c pcre2_jit_test_SOURCES = src/pcre2_jit_test.c
pcre2_jit_test_CFLAGS = $(AM_CFLAGS) pcre2_jit_test_CFLAGS = $(AM_CFLAGS)
pcre2_jit_test_LDADD = pcre2_jit_test_LDADD =
if WITH_PCRE8 if WITH_PCRE8

View File

@ -523,10 +523,10 @@ if test "$enable_utf" = "yes"; then
fi fi
if test "$enable_stack_for_recursion" = "no"; then if test "$enable_stack_for_recursion" = "no"; then
AC_DEFINE([NO_RECURSE], [], [ AC_DEFINE([HEAP_MATCH_RECURSE], [], [
PCRE2 uses recursive function calls to handle backtracking while PCRE2 uses recursive function calls to handle backtracking while
matching. This can sometimes be a problem on systems that have matching. This can sometimes be a problem on systems that have
stacks of limited size. Define NO_RECURSE to any value to get a stacks of limited size. Define HEAP_MATCH_RECURSE to any value to get a
version that doesn't use recursion in the match() function; instead version that doesn't use recursion in the match() function; instead
it creates its own stack by steam using memory from the heap. For more it creates its own stack by steam using memory from the heap. For more
detail, see the comments and other stuff just above the match() function.]) detail, see the comments and other stuff just above the match() function.])
@ -608,7 +608,7 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT_RECURSION], [$with_match_limit_recursion], [
increase the recursion depth. In some environments it is desirable increase the recursion depth. In some environments it is desirable
to limit the depth of recursive calls of match() more strictly, in to limit the depth of recursive calls of match() more strictly, in
order to restrict the maximum amount of stack (or heap, if order to restrict the maximum amount of stack (or heap, if
NO_RECURSE is defined) that is used. The value of HEAP_MATCH_RECURSE is defined) that is used. The value of
MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To
have any useful effect, it must be less than the value of have any useful effect, it must be less than the value of
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT.

View File

@ -1,13 +1,13 @@
/* src/config.h. Generated from config.h.in by configure. */ /* src/config.h. Generated from config.h.in by configure. */
/* src/config.h.in. Generated from configure.ac by autoheader. */ /* src/config.h.in. Generated from configure.ac by autoheader. */
/* PCRE is written in Standard C, but there are a few non-standard things it /* PCRE2 is written in Standard C, but there are a few non-standard things it
can cope with, allowing it to run on SunOS4 and other "close to standard" can cope with, allowing it to run on SunOS4 and other "close to standard"
systems. systems.
In environments that support the GNU autotools, config.h.in is converted into In environments that support the GNU autotools, config.h.in is converted into
config.h by the "configure" script. In environments that use CMake, config.h by the "configure" script. In environments that use CMake,
config-cmake.in is converted into config.h. If you are going to build PCRE "by config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
hand" without using "configure" or CMake, you should copy the distributed hand" without using "configure" or CMake, you should copy the distributed
config.h.generic to config.h, and edit the macro definitions to be the way you config.h.generic to config.h, and edit the macro definitions to be the way you
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands, need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
@ -24,31 +24,28 @@ macros are listed as a commented #undef in config.h.generic. Macros such as
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
surrounded by #ifndef/#endif lines so that the value can be overridden by -D. surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
sure both macros are undefined; an emulation function will then be used. */ sure both macros are undefined; an emulation function will then be used. */
/* By default, the \R escape sequence matches any Unicode line ending /* By default, the \R escape sequence matches any Unicode line ending
character or sequence of characters. If BSR_ANYCRLF is defined (to any character or sequence of characters. If BSR_ANYCRLF is defined (to any
value), this is changed so that backslash-R matches only CR, LF, or CRLF. value), this is changed so that backslash-R matches only CR, LF, or CRLF.
The build-time default can be overridden by the user of PCRE at runtime. */ The build-time default can be overridden by the user of PCRE2 at runtime.
*/
/* #undef BSR_ANYCRLF */ /* #undef BSR_ANYCRLF */
/* If you are compiling for a system that uses EBCDIC instead of ASCII /* If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro to any value. You must also edit the character codes, define this macro to any value. When EBCDIC is set, PCRE2
NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15). assumes that all input strings are in EBCDIC. If you do not define this
On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
automatically adjusted. When EBCDIC is set, PCRE assumes that all input is not possible to build a version of PCRE2 that supports both EBCDIC and
strings are in EBCDIC. If you do not define this macro, PCRE will assume UTF-8/16/32. */
input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
a version of PCRE that supports both EBCDIC and UTF-8/16/32. */
/* #undef EBCDIC */ /* #undef EBCDIC */
/* In an EBCDIC environment, define this macro to any value to arrange for the /* In an EBCDIC environment, define this macro to any value to arrange for the
NL character to be 0x25 instead of the default 0x15. NL plays the role that NL character to be 0x25 instead of the default 0x15. NL plays the role that
LF does in an ASCII/Unicode environment. The value must also be set in the LF does in an ASCII/Unicode environment. */
NEWLINE macro below. On systems that can use "configure" or CMake to set
EBCDIC_NL25, the adjustment of NEWLINE is automatic. */
/* #undef EBCDIC_NL25 */ /* #undef EBCDIC_NL25 */
/* Define to 1 if you have the `bcopy' function. */ /* Define to 1 if you have the `bcopy' function. */
@ -126,11 +123,19 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to 1 if you have the <zlib.h> header file. */ /* Define to 1 if you have the <zlib.h> header file. */
/* #undef HAVE_ZLIB_H */ /* #undef HAVE_ZLIB_H */
/* PCRE2 uses recursive function calls to handle backtracking while matching.
This can sometimes be a problem on systems that have stacks of limited
size. Define HEAP_MATCH_RECURSE to any value to get a version that doesn't
use recursion in the match() function; instead it creates its own stack by
steam using memory from the heap. For more detail, see the comments and
other stuff just above the match() function. */
/* #undef HEAP_MATCH_RECURSE */
/* The value of LINK_SIZE determines the number of bytes used to store links /* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 64K long. This covers the vast majority of cases. compiled patterns up to 64K long. This covers the vast majority of cases.
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
for longer patterns in extreme cases. */ allows for longer patterns in extreme cases. */
#ifndef LINK_SIZE #ifndef LINK_SIZE
#define LINK_SIZE 2 #define LINK_SIZE 2
#endif #endif
@ -144,7 +149,7 @@ sure both macros are undefined; an emulation function will then be used. */
/* The value of MATCH_LIMIT determines the default number of times the /* The value of MATCH_LIMIT determines the default number of times the
internal match() function can be called during a single execution of internal match() function can be called during a single execution of
pcre_exec(). There is a runtime interface for setting a different limit. pcre2_match(). There is a runtime interface for setting a different limit.
The limit exists in order to catch runaway regular expressions that take The limit exists in order to catch runaway regular expressions that take
for ever to determine that they do not match. The default is set very large for ever to determine that they do not match. The default is set very large
so that it does not accidentally catch legitimate cases. */ so that it does not accidentally catch legitimate cases. */
@ -155,11 +160,11 @@ sure both macros are undefined; an emulation function will then be used. */
/* The above limit applies to all calls of match(), whether or not they /* The above limit applies to all calls of match(), whether or not they
increase the recursion depth. In some environments it is desirable to limit increase the recursion depth. In some environments it is desirable to limit
the depth of recursive calls of match() more strictly, in order to restrict the depth of recursive calls of match() more strictly, in order to restrict
the maximum amount of stack (or heap, if NO_RECURSE is defined) that is the maximum amount of stack (or heap, if HEAP_MATCH_RECURSE is defined)
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of that is used. The value of MATCH_LIMIT_RECURSION applies only to recursive
match(). To have any useful effect, it must be less than the value of calls of match(). To have any useful effect, it must be less than the value
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is of MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There
a runtime method for setting a different limit. */ is a runtime method for setting a different limit. */
#ifndef MATCH_LIMIT_RECURSION #ifndef MATCH_LIMIT_RECURSION
#define MATCH_LIMIT_RECURSION MATCH_LIMIT #define MATCH_LIMIT_RECURSION MATCH_LIMIT
#endif #endif
@ -178,27 +183,14 @@ sure both macros are undefined; an emulation function will then be used. */
#define MAX_NAME_SIZE 32 #define MAX_NAME_SIZE 32
#endif #endif
/* The value of NEWLINE determines the default newline character sequence. /* The value of NEWLINE_DEFAULT determines the default newline character
PCRE client programs can override this by selecting other values at run sequence. PCRE2 client programs can override this by selecting other values
time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338 at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
(CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or (ANYCRLF). */
3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and #ifndef NEWLINE_DEFAULT
0x25) that are used as the NL line terminator that is equivalent to ASCII #define NEWLINE_DEFAULT 2
LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY),
or -2 (ANYCRLF). */
#ifndef NEWLINE
#define NEWLINE 10
#endif #endif
/* PCRE uses recursive function calls to handle backtracking while matching.
This can sometimes be a problem on systems that have stacks of limited
size. Define NO_RECURSE to any value to get a version that doesn't use
recursion in the match() function; instead it creates its own stack by
steam using pcre_recurse_malloc() to obtain memory from the heap. For more
detail, see the comments and other stuff just above the match() function.
*/
/* #undef NO_RECURSE */
/* Name of package */ /* Name of package */
#define PACKAGE "pcre2" #define PACKAGE "pcre2"
@ -209,7 +201,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_NAME "PCRE2" #define PACKAGE_NAME "PCRE2"
/* Define to the full name and version of this package. */ /* Define to the full name and version of this package. */
#define PACKAGE_STRING "PCRE2 9.00-DEV" #define PACKAGE_STRING "PCRE2 10.00-DEV"
/* Define to the one symbol short name of this package. */ /* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre2" #define PACKAGE_TARNAME "pcre2"
@ -218,7 +210,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_URL "" #define PACKAGE_URL ""
/* Define to the version of this package. */ /* Define to the version of this package. */
#define PACKAGE_VERSION "9.00-DEV" #define PACKAGE_VERSION "10.00-DEV"
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system parentheses (of any kind) in a pattern. This limits the amount of system
@ -227,15 +219,13 @@ sure both macros are undefined; an emulation function will then be used. */
#define PARENS_NEST_LIMIT 250 #define PARENS_NEST_LIMIT 250
#endif #endif
/* #undef PCRE2_EXP_DEFN */ /* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by
pcre2grep to hold parts of the file it is searching. This is also the
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by minimum value. The actual amount of memory used by pcre2grep is three times
pcregrep to hold parts of the file it is searching. This is also the
minimum value. The actual amount of memory used by pcregrep is three times
this number, because it allows for the buffering of "before" and "after" this number, because it allows for the buffering of "before" and "after"
lines. */ lines. */
#ifndef PCREGREP_BUFSIZE #ifndef PCRE2GREP_BUFSIZE
#define PCREGREP_BUFSIZE 20480 #define PCRE2GREP_BUFSIZE 20480
#endif #endif
/* If you are compiling for a system other than a Unix-like system or /* If you are compiling for a system other than a Unix-like system or
@ -247,21 +237,10 @@ sure both macros are undefined; an emulation function will then be used. */
This macro apears at the start of every exported function that is part This macro apears at the start of every exported function that is part
of the external API. It does not appear on functions that are "external" of the external API. It does not appear on functions that are "external"
in the C sense, but which are internal to the library. */ in the C sense, but which are internal to the library. */
/* #undef PCRE_EXP_DEFN */ /* #undef PCRE2_EXP_DEFN */
/* Define to any value if linking statically (TODO: make nice with Libtool) */ /* Define to any value if linking statically (TODO: make nice with Libtool) */
/* #undef PCRE_STATIC */ /* #undef PCRE2_STATIC */
/* When calling PCRE via the POSIX interface, additional working storage is
required for holding the pointers to capturing substrings because PCRE
requires three integers per substring, whereas the POSIX interface provides
only two. If the number of expected substrings is small, the wrapper
function uses space on the stack, because this is faster than using
malloc() for each call. The threshold above which the stack is no longer
used is defined by POSIX_MALLOC_THRESHOLD. */
#ifndef POSIX_MALLOC_THRESHOLD
#define POSIX_MALLOC_THRESHOLD 10
#endif
/* Define to necessary symbol if this constant uses a non-standard name on /* Define to necessary symbol if this constant uses a non-standard name on
your system. */ your system. */
@ -273,35 +252,35 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to any value to enable support for Just-In-Time compiling. */ /* Define to any value to enable support for Just-In-Time compiling. */
/* #undef SUPPORT_JIT */ /* #undef SUPPORT_JIT */
/* Define to any value to allow pcregrep to be linked with libbz2, so that it /* Define to any value to allow pcre2grep to be linked with libbz2, so that it
is able to handle .bz2 files. */ is able to handle .bz2 files. */
/* #undef SUPPORT_LIBBZ2 */ /* #undef SUPPORT_LIBBZ2 */
/* Define to any value to allow pcretest to be linked with libedit. */ /* Define to any value to allow pcre2test to be linked with libedit. */
/* #undef SUPPORT_LIBEDIT */ /* #undef SUPPORT_LIBEDIT */
/* Define to any value to allow pcretest to be linked with libreadline. */ /* Define to any value to allow pcre2test to be linked with libreadline. */
/* #undef SUPPORT_LIBREADLINE */ /* #undef SUPPORT_LIBREADLINE */
/* Define to any value to allow pcregrep to be linked with libz, so that it is /* Define to any value to allow pcre2grep to be linked with libz, so that it
able to handle .gz files. */ is able to handle .gz files. */
/* #undef SUPPORT_LIBZ */ /* #undef SUPPORT_LIBZ */
/* Define to any value to enable the 16 bit PCRE library. */ /* Define to any value to enable the 16 bit PCRE2 library. */
/* #undef SUPPORT_PCRE16 */ /* #undef SUPPORT_PCRE16 */
/* Define to any value to enable the 32 bit PCRE library. */ /* Define to any value to enable JIT support in pcre2grep. */
/* #undef SUPPORT_PCRE2GREP_JIT */
/* Define to any value to enable the 32 bit PCRE2 library. */
/* #undef SUPPORT_PCRE32 */ /* #undef SUPPORT_PCRE32 */
/* Define to any value to enable the 8 bit PCRE library. */ /* Define to any value to enable the 8 bit PCRE2 library. */
/* #undef SUPPORT_PCRE8 */ /* #undef SUPPORT_PCRE8 */
/* Define to any value to enable JIT support in pcregrep. */
/* #undef SUPPORT_PCREGREP_JIT */
/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding. /* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
This will work even in an EBCDIC environment, but it is incompatible with This will work even in an EBCDIC environment, but it is incompatible with
the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or* the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
ASCII/UTF-8/16/32, but not both at once. */ ASCII/UTF-8/16/32, but not both at once. */
/* #undef SUPPORT_UTF */ /* #undef SUPPORT_UTF */
@ -309,7 +288,7 @@ sure both macros are undefined; an emulation function will then be used. */
/* #undef SUPPORT_VALGRIND */ /* #undef SUPPORT_VALGRIND */
/* Version number of package */ /* Version number of package */
#define VERSION "9.00-DEV" #define VERSION "10.00-DEV"
/* Define to empty if `const' does not conform to ANSI C. */ /* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */ /* #undef const */

View File

@ -271,12 +271,14 @@ typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16; typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32; typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
/* Offsets in the pattern (for errors) and in the subject (after a match) are /* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE22,
unsigned 32-bit numbers. We also define a value to indicate "unset" in the including pattern offsets for errors and subject offsets after a match. We
offset vector (ovector). */ define special values to indicate zero-terminated strings and unset offsets in
the offset vector (ovector). */
#define PCRE2_OFFSET PCRE2_UCHAR32 #define PCRE2_SIZE size_t
#define PCRE2_UNSET (~(PCRE2_OFFSET)0) #define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
/* Generic types for opaque structures and JIT callback functions. These /* Generic types for opaque structures and JIT callback functions. These
declarations are defined in a macro that is expanded for each width later. */ declarations are defined in a macro that is expanded for each width later. */
@ -314,17 +316,17 @@ typedef struct pcre2_callout_block { \
int version; /* Identifies version of block */ \ int version; /* Identifies version of block */ \
/* ------------------------ Version 0 ------------------------------- */ \ /* ------------------------ Version 0 ------------------------------- */ \
uint32_t callout_number; /* Number compiled into pattern */ \ uint32_t callout_number; /* Number compiled into pattern */ \
PCRE2_OFFSET *offset_vector; /* The offset vector */ \ PCRE2_SIZE *offset_vector; /* The offset vector */ \
PCRE2_SPTR subject; /* The subject being matched */ \ PCRE2_SPTR subject; /* The subject being matched */ \
size_t subject_length; /* The length of the subject */ \ size_t subject_length; /* The length of the subject */ \
PCRE2_OFFSET start_match; /* Offset to start of this match attempt */ \ PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
PCRE2_OFFSET current_position; /* Where we currently are in the subject */ \ PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
uint32_t capture_top; /* Max current capture */ \ uint32_t capture_top; /* Max current capture */ \
uint32_t capture_last; /* Most recently closed capture */ \ uint32_t capture_last; /* Most recently closed capture */ \
void *callout_data; /* Data passed in with the call */ \ void *callout_data; /* Data passed in with the call */ \
/* ------------------- Added for Version 1 -------------------------- */ \ /* ------------------- Added for Version 1 -------------------------- */ \
PCRE2_OFFSET pattern_position; /* Offset to next item in the pattern */ \ PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
PCRE2_OFFSET next_item_length; /* Length of next item in the pattern */ \ PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
/* ------------------- Added for Version 2 -------------------------- */ \ /* ------------------- Added for Version 2 -------------------------- */ \
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \ PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
/* ------------------------------------------------------------------ */ \ /* ------------------------------------------------------------------ */ \
@ -392,8 +394,8 @@ PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
#define PCRE2_COMPILE_FUNCTIONS \ #define PCRE2_COMPILE_FUNCTIONS \
PCRE2_EXP_DECL \ PCRE2_EXP_DECL \
pcre2_code *pcre2_compile(PCRE2_SPTR, int, uint32_t, \ pcre2_code *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, \
int *, PCRE2_OFFSET *, pcre2_compile_context *); \ int *, PCRE2_SIZE *, pcre2_compile_context *); \
PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *); PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *);
@ -413,20 +415,20 @@ PCRE2_EXP_DECL \
PCRE2_EXP_DECL \ PCRE2_EXP_DECL \
pcre2_match_data *pcre2_match_data_create_from_pattern(pcre2_code *, \ pcre2_match_data *pcre2_match_data_create_from_pattern(pcre2_code *, \
pcre2_general_context *); \ pcre2_general_context *); \
PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, \ PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, \
PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \ PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
pcre2_match_data *, pcre2_match_context *, int *, \ pcre2_match_data *, pcre2_match_context *, int *, \
size_t); \ size_t); \
PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \ PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \
PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \ PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
pcre2_match_data *, pcre2_match_context *); \ pcre2_match_data *, pcre2_match_context *); \
PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \ PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_leftchar(pcre2_match_data *); \ PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_leftchar(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \ PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \
PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \ PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_OFFSET *pcre2_get_ovector_pointer(pcre2_match_data *); \ PCRE2_EXP_DECL PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_rightchar(pcre2_match_data *); \ PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_rightchar(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_startchar(pcre2_match_data *); PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *);
/* Convenience functions for handling matched substrings. */ /* Convenience functions for handling matched substrings. */
@ -457,9 +459,9 @@ PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \
/* Functions for JIT processing */ /* Functions for JIT processing */
#define PCRE2_JIT_FUNCTIONS \ #define PCRE2_JIT_FUNCTIONS \
PCRE2_EXP_DECL void pcre2_jit_compile(pcre2_code *, uint32_t); \ PCRE2_EXP_DECL int pcre2_jit_compile(pcre2_code *, uint32_t); \
PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \ PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \
PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \ PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
pcre2_match_data *, pcre2_match_context *, \ pcre2_match_data *, pcre2_match_context *, \
pcre2_jit_stack *); \ pcre2_jit_stack *); \
PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *);\ PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *);\

View File

@ -7256,8 +7256,8 @@ Returns: pointer to compiled data block, or NULL on error,
*/ */
PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION
pcre2_compile(PCRE2_SPTR pattern, int patlen, uint32_t options, pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
int *errorptr, PCRE2_OFFSET *erroroffset, pcre2_compile_context *ccontext) int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
{ {
BOOL utf; /* Set TRUE for UTF mode */ BOOL utf; /* Set TRUE for UTF mode */
pcre2_real_code *re = NULL; /* What we will return */ pcre2_real_code *re = NULL; /* What we will return */
@ -7324,10 +7324,12 @@ if (ccontext == NULL)
ccontext = &default_context; ccontext = &default_context;
} }
/* A negative pattern length means "zero-terminated". Otherwise, we make /* A zero-terminated pattern is indicated by the special length value
a copy of the pattern and add a zero. */ PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero,
to ensure that it is always possible to look one code unit beyond the end of
the pattern's characters. */
if (patlen < 0) patlen = PRIV(strlen)(pattern); else if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern); else
{ {
if (patlen < COPIED_PATTERN_SIZE) if (patlen < COPIED_PATTERN_SIZE)
copied_pattern = stack_copied_pattern; copied_pattern = stack_copied_pattern;
@ -7473,7 +7475,7 @@ if (utf)
goto HAD_ERROR; goto HAD_ERROR;
} }
if ((options & PCRE2_NO_UTF_CHECK) == 0 && if ((options & PCRE2_NO_UTF_CHECK) == 0 &&
(errorcode = PRIV(valid_utf)(pattern, -1, erroroffset)) != 0) (errorcode = PRIV(valid_utf)(pattern, patlen, erroroffset)) != 0)
goto HAD_ERROR; goto HAD_ERROR;
} }

View File

@ -136,7 +136,7 @@ switch (what)
break; break;
case PCRE2_CONFIG_STACKRECURSE: case PCRE2_CONFIG_STACKRECURSE:
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
*((int *)where) = 0; *((int *)where) = 0;
#else #else
*((int *)where) = 1; *((int *)where) = 1;

View File

@ -164,7 +164,7 @@ if (defmemctl)
mcontext->memctl.free = default_free; mcontext->memctl.free = default_free;
mcontext->memctl.memory_data = NULL; mcontext->memctl.memory_data = NULL;
} }
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
mcontext->stack_memctl = mcontext->memctl; mcontext->stack_memctl = mcontext->memctl;
#endif #endif
mcontext->callout = NULL; mcontext->callout = NULL;
@ -389,7 +389,7 @@ pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *), void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
void *mydata) void *mydata)
{ {
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
mcontext->stack_memctl.malloc = mymalloc; mcontext->stack_memctl.malloc = mymalloc;
mcontext->stack_memctl.free = myfree; mcontext->stack_memctl.free = myfree;
mcontext->stack_memctl.memory_data = mydata; mcontext->stack_memctl.memory_data = mydata;

View File

@ -365,8 +365,8 @@ internal_dfa_match(
dfa_match_block *mb, dfa_match_block *mb,
PCRE2_SPTR this_start_code, PCRE2_SPTR this_start_code,
PCRE2_SPTR current_subject, PCRE2_SPTR current_subject,
PCRE2_OFFSET start_offset, PCRE2_SIZE start_offset,
PCRE2_OFFSET *offsets, PCRE2_SIZE *offsets,
uint32_t offsetcount, uint32_t offsetcount,
int *workspace, int *workspace,
int wscount, int wscount,
@ -730,7 +730,7 @@ for (;;)
else if (match_count > 0 && ++match_count * 2 > (int)offsetcount) else if (match_count > 0 && ++match_count * 2 > (int)offsetcount)
match_count = 0; match_count = 0;
count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2; count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2;
if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int)); if (count > 0) memmove(offsets + 2, offsets, count * sizeof(PCRE2_SIZE));
if (offsetcount >= 2) if (offsetcount >= 2)
{ {
offsets[0] = (int)(current_subject - start_subject); offsets[0] = (int)(current_subject - start_subject);
@ -2560,7 +2560,7 @@ for (;;)
case OP_ASSERTBACK_NOT: case OP_ASSERTBACK_NOT:
{ {
PCRE2_SPTR endasscode = code + GET(code, 1); PCRE2_SPTR endasscode = code + GET(code, 1);
PCRE2_OFFSET local_offsets[2]; PCRE2_SIZE local_offsets[2];
int rc; int rc;
int local_workspace[1000]; int local_workspace[1000];
@ -2572,7 +2572,7 @@ for (;;)
ptr, /* where we currently are */ ptr, /* where we currently are */
(int)(ptr - start_subject), /* start offset */ (int)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */ local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_OFFSET), /* size of same */ sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
local_workspace, /* workspace vector */ local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */ sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */ rlevel); /* function recursion level */
@ -2587,7 +2587,7 @@ for (;;)
case OP_COND: case OP_COND:
case OP_SCOND: case OP_SCOND:
{ {
PCRE2_OFFSET local_offsets[1000]; PCRE2_SIZE local_offsets[1000];
int local_workspace[1000]; int local_workspace[1000];
int codelink = GET(code, 1); int codelink = GET(code, 1);
int condcode; int condcode;
@ -2606,9 +2606,9 @@ for (;;)
cb.callout_number = code[LINK_SIZE+2]; cb.callout_number = code[LINK_SIZE+2];
cb.offset_vector = offsets; cb.offset_vector = offsets;
cb.subject = start_subject; cb.subject = start_subject;
cb.subject_length = (int)(end_subject - start_subject); cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
cb.start_match = (int)(current_subject - start_subject); cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
cb.current_position = (int)(ptr - start_subject); cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
cb.pattern_position = GET(code, LINK_SIZE + 3); cb.pattern_position = GET(code, LINK_SIZE + 3);
cb.next_item_length = GET(code, 3 + 2*LINK_SIZE); cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
cb.capture_top = 1; cb.capture_top = 1;
@ -2664,7 +2664,7 @@ for (;;)
ptr, /* where we currently are */ ptr, /* where we currently are */
(int)(ptr - start_subject), /* start offset */ (int)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */ local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_OFFSET), /* size of same */ sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
local_workspace, /* workspace vector */ local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */ sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */ rlevel); /* function recursion level */
@ -2683,7 +2683,7 @@ for (;;)
case OP_RECURSE: case OP_RECURSE:
{ {
dfa_recursion_info *ri; dfa_recursion_info *ri;
PCRE2_OFFSET local_offsets[1000]; PCRE2_SIZE local_offsets[1000];
int local_workspace[1000]; int local_workspace[1000];
PCRE2_SPTR callpat = start_code + GET(code, 1); PCRE2_SPTR callpat = start_code + GET(code, 1);
uint32_t recno = (callpat == mb->start_code)? 0 : uint32_t recno = (callpat == mb->start_code)? 0 :
@ -2712,7 +2712,7 @@ for (;;)
ptr, /* where we currently are */ ptr, /* where we currently are */
(int)(ptr - start_subject), /* start offset */ (int)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */ local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_OFFSET), /* size of same */ sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
local_workspace, /* workspace vector */ local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */ sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */ rlevel); /* function recursion level */
@ -2777,7 +2777,7 @@ for (;;)
for (matched_count = 0;; matched_count++) for (matched_count = 0;; matched_count++)
{ {
PCRE2_OFFSET local_offsets[2]; PCRE2_SIZE local_offsets[2];
int local_workspace[1000]; int local_workspace[1000];
int rc = internal_dfa_match( int rc = internal_dfa_match(
@ -2786,7 +2786,7 @@ for (;;)
local_ptr, /* where we currently are */ local_ptr, /* where we currently are */
(int)(ptr - start_subject), /* start offset */ (int)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */ local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_OFFSET), /* size of same */ sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
local_workspace, /* workspace vector */ local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */ sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */ rlevel); /* function recursion level */
@ -2849,7 +2849,7 @@ for (;;)
case OP_ONCE: case OP_ONCE:
case OP_ONCE_NC: case OP_ONCE_NC:
{ {
PCRE2_OFFSET local_offsets[2]; PCRE2_SIZE local_offsets[2];
int local_workspace[1000]; int local_workspace[1000];
int rc = internal_dfa_match( int rc = internal_dfa_match(
@ -2858,7 +2858,7 @@ for (;;)
ptr, /* where we currently are */ ptr, /* where we currently are */
(int)(ptr - start_subject), /* start offset */ (int)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */ local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_OFFSET), /* size of same */ sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
local_workspace, /* workspace vector */ local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */ sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */ rlevel); /* function recursion level */
@ -2948,9 +2948,9 @@ for (;;)
cb.callout_number = code[1]; cb.callout_number = code[1];
cb.offset_vector = offsets; cb.offset_vector = offsets;
cb.subject = start_subject; cb.subject = start_subject;
cb.subject_length = (int)(end_subject - start_subject); cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
cb.start_match = (int)(current_subject - start_subject); cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
cb.current_position = (int)(ptr - start_subject); cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
cb.pattern_position = GET(code, 2); cb.pattern_position = GET(code, 2);
cb.next_item_length = GET(code, 2 + LINK_SIZE); cb.next_item_length = GET(code, 2 + LINK_SIZE);
cb.capture_top = 1; cb.capture_top = 1;
@ -3049,8 +3049,8 @@ Returns: > 0 => number of match offset pairs placed in offsets
*/ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, int length, pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_OFFSET start_offset, uint32_t options, pcre2_match_data *match_data, PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext, int *workspace, size_t wscount) pcre2_match_context *mcontext, int *workspace, size_t wscount)
{ {
const pcre2_real_code *re = (const pcre2_real_code *)code; const pcre2_real_code *re = (const pcre2_real_code *)code;
@ -3078,9 +3078,10 @@ is used below, and it expects NLBLOCK to be defined as a pointer. */
dfa_match_block actual_match_block; dfa_match_block actual_match_block;
dfa_match_block *mb = &actual_match_block; dfa_match_block *mb = &actual_match_block;
/* A negative length implies a zero-terminated subject string. */ /* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
subject string. */
if (length < 0) length = PRIV(strlen)(subject); if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
/* Plausibility checks */ /* Plausibility checks */
@ -3088,7 +3089,7 @@ if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL) if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
return PCRE2_ERROR_NULL; return PCRE2_ERROR_NULL;
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE; if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
if ((int)start_offset > length) return PCRE2_ERROR_BADOFFSET; if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
/* Check that the first field in the block is the magic number. If it is not, /* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
@ -3214,7 +3215,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
return match_data->rc; return match_data->rc;
} }
#if PCRE2_CODE_UNIT_WIDTH != 32 #if PCRE2_CODE_UNIT_WIDTH != 32
if (start_offset > 0 && (int)start_offset < length && if (start_offset > 0 && start_offset < length &&
NOT_FIRSTCHAR(subject[start_offset])) NOT_FIRSTCHAR(subject[start_offset]))
return PCRE2_ERROR_BADUTFOFFSET; return PCRE2_ERROR_BADUTFOFFSET;
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
@ -3466,12 +3467,12 @@ for (;;)
{ {
if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0) if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0)
{ {
match_data->ovector[0] = (PCRE2_OFFSET)(start_match - subject); match_data->ovector[0] = (PCRE2_SIZE)(start_match - subject);
match_data->ovector[1] = (PCRE2_OFFSET)(end_subject - subject); match_data->ovector[1] = (PCRE2_SIZE)(end_subject - subject);
} }
match_data->leftchar = (PCRE2_OFFSET)(mb->start_used_ptr - subject); match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
match_data->rightchar = 0; /* FIXME */ match_data->rightchar = 0; /* FIXME */
match_data->startchar = (PCRE2_OFFSET)(start_match - subject); match_data->startchar = (PCRE2_SIZE)(start_match - subject);
match_data->rc = rc; match_data->rc = rc;
return rc; return rc;
} }

View File

@ -1859,7 +1859,7 @@ extern void _pcre2_compile_context_init(pcre2_compile_context *, BOOL);
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int); extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *, extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
BOOL); BOOL);
extern int _pcre2_jit_get_size(void *); extern size_t _pcre2_jit_get_size(void *);
extern void _pcre2_match_context_init(pcre2_match_context *, BOOL); extern void _pcre2_match_context_init(pcre2_match_context *, BOOL);
extern void *_pcre2_memctl_malloc(size_t, pcre2_memctl *); extern void *_pcre2_memctl_malloc(size_t, pcre2_memctl *);
extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *); extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *);
@ -1869,7 +1869,7 @@ extern int _pcre2_strlen(PCRE2_SPTR);
extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t); extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t);
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t); extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
extern int _pcre2_study(pcre2_real_code *); extern int _pcre2_study(pcre2_real_code *);
extern int _pcre2_valid_utf(PCRE2_SPTR, int, PCRE2_OFFSET *); extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *, extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
BOOL); BOOL);
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL); extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);

View File

@ -565,7 +565,7 @@ typedef struct pcre2_real_compile_context {
typedef struct pcre2_real_match_context { typedef struct pcre2_real_match_context {
pcre2_memctl memctl; pcre2_memctl memctl;
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
pcre2_memctl stack_memctl; pcre2_memctl stack_memctl;
#endif #endif
int (*callout)(pcre2_callout_block *); int (*callout)(pcre2_callout_block *);
@ -609,12 +609,12 @@ typedef struct pcre2_real_match_data {
const pcre2_real_code *code; /* The pattern used for the match */ const pcre2_real_code *code; /* The pattern used for the match */
PCRE2_SPTR subject; /* The subject that was matched */ PCRE2_SPTR subject; /* The subject that was matched */
int rc; /* The return code from the match */ int rc; /* The return code from the match */
PCRE2_OFFSET leftchar; /* Offset to leftmost code unit */ PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
PCRE2_OFFSET rightchar; /* Offset to rightmost code unit */ PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
PCRE2_OFFSET startchar; /* Offset to starting code unit */ PCRE2_SIZE startchar; /* Offset to starting code unit */
PCRE2_SPTR mark; /* Pointer to last mark */ PCRE2_SPTR mark; /* Pointer to last mark */
uint16_t oveccount; /* Number of pairs */ uint16_t oveccount; /* Number of pairs */
PCRE2_OFFSET ovector[1]; /* The first field */ PCRE2_SIZE ovector[1]; /* The first field */
} pcre2_real_match_data; } pcre2_real_match_data;
@ -688,7 +688,7 @@ call within the pattern; used by pcre_match(). */
typedef struct recursion_info { typedef struct recursion_info {
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
unsigned int group_num; /* Number of group that was called */ unsigned int group_num; /* Number of group that was called */
PCRE2_OFFSET *offset_save; /* Pointer to start of saved offsets */ PCRE2_SIZE *ovec_save; /* Pointer to start of saved ovector */
uint32_t saved_max; /* Number of saved offsets */ uint32_t saved_max; /* Number of saved offsets */
uint32_t saved_capture_last; /* Last capture number */ uint32_t saved_capture_last; /* Last capture number */
PCRE2_SPTR subject_position; /* Position at start of recursion */ PCRE2_SPTR subject_position; /* Position at start of recursion */
@ -717,7 +717,7 @@ doing traditional NFA matching (pcre2_match() and friends). */
typedef struct match_block { typedef struct match_block {
pcre2_memctl memctl; /* For general use */ pcre2_memctl memctl; /* For general use */
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
pcre2_memctl stack_memctl; /* For "stack" frames */ pcre2_memctl stack_memctl; /* For "stack" frames */
#endif #endif
uint32_t match_call_count; /* As it says */ uint32_t match_call_count; /* As it says */
@ -728,11 +728,11 @@ typedef struct match_block {
const uint8_t *lcc; /* Points to lower casing table */ const uint8_t *lcc; /* Points to lower casing table */
const uint8_t *fcc; /* Points to case-flipping table */ const uint8_t *fcc; /* Points to case-flipping table */
const uint8_t *ctypes; /* Points to table of type maps */ const uint8_t *ctypes; /* Points to table of type maps */
PCRE2_OFFSET *ovector; /* Pointer to the offset vector */ PCRE2_SIZE *ovector; /* Pointer to the offset vector */
PCRE2_OFFSET offset_end; /* One past the end */ PCRE2_SIZE offset_end; /* One past the end */
PCRE2_OFFSET offset_max; /* The maximum usable for return data */ PCRE2_SIZE offset_max; /* The maximum usable for return data */
PCRE2_OFFSET start_offset; /* The start offset value */ PCRE2_SIZE start_offset; /* The start offset value */
PCRE2_OFFSET end_offset_top; /* Highwater mark at end of match */ PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */
uint16_t partial; /* PARTIAL options */ uint16_t partial; /* PARTIAL options */
uint16_t bsr_convention; /* \R interpretation */ uint16_t bsr_convention; /* \R interpretation */
uint16_t name_count; /* Number of names in name table */ uint16_t name_count; /* Number of names in name table */
@ -760,7 +760,7 @@ typedef struct match_block {
recursion_info *recursive; /* Linked list of recursion data */ recursion_info *recursive; /* Linked list of recursion data */
void *callout_data; /* To pass back to callouts */ void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */ int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
void *match_frames_base; /* For remembering malloc'd frames */ void *match_frames_base; /* For remembering malloc'd frames */
#endif #endif
} match_block; } match_block;
@ -775,7 +775,7 @@ typedef struct dfa_match_block {
PCRE2_SPTR end_subject; /* End of subject string */ PCRE2_SPTR end_subject; /* End of subject string */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */ PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
const uint8_t *tables; /* Character tables */ const uint8_t *tables; /* Character tables */
PCRE2_OFFSET start_offset; /* The start offset value */ PCRE2_SIZE start_offset; /* The start offset value */
uint32_t moptions; /* Match options */ uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */ uint32_t poptions; /* Pattern options */
uint32_t nltype; /* Newline type */ uint32_t nltype; /* Newline type */

View File

@ -62,16 +62,21 @@ Returns: nothing
/* FIXME: this is currently a placeholder function */ /* FIXME: this is currently a placeholder function */
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code *code, uint32_t options) pcre2_jit_compile(pcre2_code *code, uint32_t options)
{ {
#ifndef SUPPORT_JIT #ifndef SUPPORT_JIT
(void)code; (void)code;
(void)options; (void)options;
return PCRE2_ERROR_JIT_BADOPTION;
#else /* SUPPORT_JIT */ #else /* SUPPORT_JIT */
/* Dummy code */
code=code; options = options; /* Dummy.... */ code=code;
options = options;
return PCRE2_ERROR_JIT_BADOPTION;
#endif /* SUPPORT_JIT */ #endif /* SUPPORT_JIT */
} }

View File

@ -71,11 +71,12 @@ Returns: > 0 => success; value is the number of ovector pairs filled
/* FIXME: this is currently a placeholder function */ /* FIXME: this is currently a placeholder function */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, int length, pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_OFFSET start_offset, uint32_t options, pcre2_match_data *match_data, PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext, pcre2_jit_stack *jit_stack) pcre2_match_context *mcontext, pcre2_jit_stack *jit_stack)
{ {
#ifndef SUPPORT_JIT #ifndef SUPPORT_JIT
(void)code; (void)code;
(void)subject; (void)subject;
(void)length; (void)length;
@ -85,12 +86,16 @@ pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, int length,
(void)mcontext; (void)mcontext;
(void)jit_stack; (void)jit_stack;
return PCRE2_ERROR_JIT_BADOPTION; return PCRE2_ERROR_JIT_BADOPTION;
#else /* SUPPORT_JIT */ #else /* SUPPORT_JIT */
/* Dummy code */ /* Dummy code */
code=code;subject=subject;length=length; code=code;
start_offset=start_offset; options=options; match_data=match_data; subject=subject;
length=length;
start_offset=start_offset;
options=options;
match_data=match_data;
mcontext=mcontext; mcontext=mcontext;
jit_stack=jit_stack; jit_stack=jit_stack;
return PCRE2_ERROR_JIT_BADOPTION; return PCRE2_ERROR_JIT_BADOPTION;

View File

@ -58,10 +58,14 @@ PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_jit_free_unused_memory(pcre2_general_context *gcontext) pcre2_jit_free_unused_memory(pcre2_general_context *gcontext)
{ {
#ifndef SUPPORT_JIT #ifndef SUPPORT_JIT
(void)gcontext; /* Suppress warning */ (void)gcontext; /* Suppress warning */
#else /* SUPPORT_JIT */ #else /* SUPPORT_JIT */
gcontext=gcontext; /* Dummy */ /* Dummy code */
gcontext=gcontext;
#endif /* SUPPORT_JIT */ #endif /* SUPPORT_JIT */
} }
@ -76,14 +80,20 @@ pcre2_jit_stack_alloc(pcre2_general_context *gcontext, size_t startsize,
size_t maxsize) size_t maxsize)
{ {
#ifndef SUPPORT_JIT #ifndef SUPPORT_JIT
(void)gcontext; (void)gcontext;
(void)startsize; (void)startsize;
(void)maxsize; (void)maxsize;
return NULL; return NULL;
#else /* SUPPORT_JIT */ #else /* SUPPORT_JIT */
gcontext=gcontext;startsize=startsize;maxsize=maxsize; /* Dummy code */
gcontext=gcontext;
startsize=startsize;
maxsize=maxsize;
return NULL; return NULL;
#endif #endif
} }
@ -97,12 +107,18 @@ pcre2_jit_stack_assign(const pcre2_code *code, pcre2_jit_callback callback,
void *callback_data) void *callback_data)
{ {
#ifndef SUPPORT_JIT #ifndef SUPPORT_JIT
(void)code; (void)code;
(void)callback; (void)callback;
(void)callback_data; (void)callback_data;
#else /* SUPPORT_JIT */ #else /* SUPPORT_JIT */
code=code;callback=callback;callback_data=callback_data; /* Dummy code */
code=code;
callback=callback;
callback_data=callback_data;
#endif /* SUPPORT_JIT */ #endif /* SUPPORT_JIT */
} }
@ -115,10 +131,14 @@ PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_jit_stack_free(pcre2_jit_stack *jit_stack) pcre2_jit_stack_free(pcre2_jit_stack *jit_stack)
{ {
#ifndef SUPPORT_JIT #ifndef SUPPORT_JIT
(void)jit_stack; (void)jit_stack;
#else /* SUPPORT_JIT */ #else /* SUPPORT_JIT */
/* Dummy code */
jit_stack=jit_stack; jit_stack=jit_stack;
#endif /* SUPPORT_JIT */ #endif /* SUPPORT_JIT */
} }
@ -127,16 +147,20 @@ jit_stack=jit_stack;
* Get size of JIT code * * Get size of JIT code *
*************************************************/ *************************************************/
int size_t
PRIV(jit_get_size)(void *executable_jit) PRIV(jit_get_size)(void *executable_jit)
{ {
#ifndef SUPPORT_JIT #ifndef SUPPORT_JIT
(void)executable_jit; (void)executable_jit;
return 0; return 0;
#else /* SUPPORT_JIT */ #else /* SUPPORT_JIT */
/* Dummy code */
executable_jit = executable_jit; executable_jit = executable_jit;
return 0; /* FIXME */ return 0;
#endif #endif
} }

View File

@ -104,17 +104,18 @@ for any one of them can use a range. */
#define MATCH_BACKTRACK_MAX MATCH_THEN #define MATCH_BACKTRACK_MAX MATCH_THEN
#define MATCH_BACKTRACK_MIN MATCH_COMMIT #define MATCH_BACKTRACK_MIN MATCH_COMMIT
/* Maximum number of ints of offset to save on the stack for recursive calls.
If the offset vector is bigger, malloc is used. This should be a multiple of 3,
because the offset vector is always a multiple of 3 long. */
#define REC_STACK_SAVE_MAX 30
/* Min and max values for the common repeats; for the maxima, 0 => infinity */ /* Min and max values for the common repeats; for the maxima, 0 => infinity */
static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, }; static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, }; static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
/* Maximum number of ovector elements that can be saved on the system stack
when processing OP_RECURSE in non-HEAP_MATCH_RECURSE mode. If the ovector is
bigger, malloc() is used. This value should be a multiple of 3, because the
ovector length is always a multiple of 3. */
#define OP_RECURSE_STACK_SAVE_MAX 45
/************************************************* /*************************************************
@ -129,11 +130,11 @@ subject bytes matched may be different to the number of reference bytes.
Arguments: Arguments:
offset index into the offset vector offset index into the offset vector
eptr pointer into the subject eptr pointer into the subject
length length of reference to be matched (number of bytes) length length of reference to be matched (number of code units)
mb points to match block mb points to match block
caseless TRUE if caseless caseless TRUE if caseless
Returns: >= 0 the number of subject bytes matched Returns: >= 0 the number of subject code units matched
-1 no match -1 no match
-2 partial match; always given if at end subject -2 partial match; always given if at end subject
*/ */
@ -230,7 +231,7 @@ return (int)(eptr - eptr_start);
RECURSION IN THE match() FUNCTION RECURSION IN THE match() FUNCTION
The match() function is highly recursive, though not every recursive call The match() function is highly recursive, though not every recursive call
increases the recursive depth. Nevertheless, some regular expressions can cause increases the recursion depth. Nevertheless, some regular expressions can cause
it to recurse to a great depth. I was writing for Unix, so I just let it call it to recurse to a great depth. I was writing for Unix, so I just let it call
itself recursively. This uses the stack for saving everything that has to be itself recursively. This uses the stack for saving everything that has to be
saved for a recursive call. On Unix, the stack can be large, and this works saved for a recursive call. On Unix, the stack can be large, and this works
@ -241,9 +242,9 @@ programs that use a lot of stack. (This despite the fact that every last chip
has oodles of memory these days, and techniques for extending the stack have has oodles of memory these days, and techniques for extending the stack have
been known for decades.) So.... been known for decades.) So....
There is a fudge, triggered by defining NO_RECURSE, which avoids recursive There is a fudge, triggered by defining HEAP_MATCH_RECURSE, which avoids
calls by keeping local variables that need to be preserved in blocks of memory recursive calls by keeping local variables that need to be preserved in blocks
obtained from malloc() instead instead of on the stack. Macros are used to of memory on the heap instead instead of on the stack. Macros are used to
achieve this so that the actual code doesn't look very different to what it achieve this so that the actual code doesn't look very different to what it
always used to. always used to.
@ -274,11 +275,10 @@ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60, RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
RM61, RM62, RM63, RM64, RM65, RM66, RM67 }; RM61, RM62, RM63, RM64, RM65, RM66, RM67 };
/* These versions of the macros use the stack, as normal. There are debugging /* These versions of the macros use the stack, as normal. Note that the "rw"
versions and production versions. Note that the "rw" argument of RMATCH isn't argument of RMATCH isn't actually used in this definition. */
actually used in this definition. */
#ifndef NO_RECURSE #ifndef HEAP_MATCH_RECURSE
#define REGISTER register #define REGISTER register
#define RMATCH(ra,rb,rc,rd,re,rw) \ #define RMATCH(ra,rb,rc,rd,re,rw) \
rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1) rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
@ -350,10 +350,9 @@ typedef struct heapframe {
eptrblock *Xeptrb; eptrblock *Xeptrb;
PCRE2_OFFSET Xoffset; PCRE2_SIZE Xoffset;
PCRE2_OFFSET Xoffset_top; PCRE2_SIZE Xoffset_top;
PCRE2_OFFSET Xstacksave[REC_STACK_SAVE_MAX]; PCRE2_SIZE Xsave_offset1, Xsave_offset2, Xsave_offset3;
PCRE2_OFFSET Xsave_offset1, Xsave_offset2, Xsave_offset3;
uint32_t Xfc; uint32_t Xfc;
uint32_t Xnumber; uint32_t Xnumber;
@ -395,6 +394,99 @@ typedef struct heapframe {
***************************************************************************/ ***************************************************************************/
/* When HEAP_MATCH_RECURSE is not defined, the match() function implements
backtrack points by calling itself recursively in all but one case. The one
special case is when processing OP_RECURSE, which specifies recursion in the
pattern. The entire ovector must be saved and restored while processing
OP_RECURSE. If the ovector is small enough, instead of calling match()
directly, op_recurse_ovecsave() is called. This function uses the system stack
to save the ovector while calling match() to process the pattern recursion. */
#ifndef HEAP_MATCH_RECURSE
/* We need a prototype for match() because it is mutually recursive with
op_recurse_ovecsave(). */
static int
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth);
/*************************************************
* Process OP_RECURSE, stacking ovector *
*************************************************/
/* When this function is called, mb->recursive has already been updated to
point to a new recursion data block, and all its fields other than ovec_save
have been set.
Arguments:
eptr pointer to current character in subject
callpat the recursion point in the pattern
mstart pointer to the current match start position (can be modified
by encountering \K)
offset_top current top pointer
mb pointer to "static" info block for the match
eptrb pointer to chain of blocks containing eptr at start of
brackets - for testing for empty matches
rdepth the recursion depth
Returns: a match() return code
*/
static int
op_recurse_ovecsave(REGISTER PCRE2_SPTR eptr, PCRE2_SPTR callpat,
PCRE2_SPTR mstart, PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb,
uint32_t rdepth)
{
register int rrc;
BOOL cbegroup = *callpat >= OP_SBRA;
recursion_info *new_recursive = mb->recursive;
PCRE2_SIZE ovecsave[OP_RECURSE_STACK_SAVE_MAX];
/* Save the ovector */
new_recursive->ovec_save = ovecsave;
memcpy(ovecsave, mb->ovector, new_recursive->saved_max * sizeof(PCRE2_SIZE));
/* Do the recursion. After processing each alternative, restore the ovector
data and the last captured value. */
do
{
if (cbegroup) mb->match_function_type = MATCH_CBEGROUP;
rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
mb, eptrb, rdepth + 1);
memcpy(mb->ovector, new_recursive->ovec_save,
new_recursive->saved_max * sizeof(PCRE2_SIZE));
mb->capture_last = new_recursive->saved_capture_last;
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) return rrc;
/* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
recursion; they cause a NOMATCH for the entire recursion. These codes
are defined in a range that can be tested for. */
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
return MATCH_NOMATCH;
/* Any return code other than NOMATCH is an error. Otherwise, advance to the
next alternative or to the end of the recursing subpattern. If there were
nested recursions, mb->recursive might be changed, so reset it before
looping. */
if (rrc != MATCH_NOMATCH) return rrc;
mb->recursive = new_recursive;
callpat += GET(callpat, 1);
}
while (*callpat == OP_ALT); /* Loop for the alternatives */
/* None of the alternatives matched. */
return MATCH_NOMATCH;
}
#endif /* HEAP_MATCH_RECURSE */
/************************************************* /*************************************************
* Match from current position * * Match from current position *
@ -451,9 +543,8 @@ Returns: MATCH_MATCH if matched ) these values are >= 0
*/ */
static int static int
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
PCRE2_SPTR mstart, PCRE2_OFFSET offset_top, match_block *mb, eptrblock *eptrb, PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth)
uint32_t rdepth)
{ {
/* These variables do not need to be preserved over recursion in this function, /* These variables do not need to be preserved over recursion in this function,
so they can be ordinary variables in all cases. Mark some of them with so they can be ordinary variables in all cases. Mark some of them with
@ -475,7 +566,7 @@ whenever RMATCH() does a "recursion". See the macro definitions above. Putting
the top-level on the stack rather than malloc-ing them all gives a performance the top-level on the stack rather than malloc-ing them all gives a performance
boost in many cases where there is not much "recursion". */ boost in many cases where there is not much "recursion". */
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
heapframe *frame = (heapframe *)mb->match_frames_base; heapframe *frame = (heapframe *)mb->match_frames_base;
/* Copy in the original argument variables */ /* Copy in the original argument variables */
@ -535,7 +626,6 @@ HEAP_RECURSE:
#define save_offset1 frame->Xsave_offset1 #define save_offset1 frame->Xsave_offset1
#define save_offset2 frame->Xsave_offset2 #define save_offset2 frame->Xsave_offset2
#define save_offset3 frame->Xsave_offset3 #define save_offset3 frame->Xsave_offset3
#define stacksave frame->Xstacksave
#define condition frame->Xcondition #define condition frame->Xcondition
#define cur_is_word frame->Xcur_is_word #define cur_is_word frame->Xcur_is_word
@ -543,11 +633,11 @@ HEAP_RECURSE:
#define newptrb frame->Xnewptrb #define newptrb frame->Xnewptrb
/* When recursion is being used, local variables are allocated on the stack and /* When normal stack-based recursion is being used for match(), local variables
get preserved during recursion in the normal way. In this environment, fi and are allocated on the stack and get preserved during recursion in the usual way.
i, and fc and c, can be the same variables. */ In this environment, fi and i, and fc and c, can be the same variables. */
#else /* NO_RECURSE not defined */ #else /* HEAP_MATCH_RECURSE not defined */
#define fi i #define fi i
#define fc c #define fc c
@ -569,9 +659,8 @@ PCRE2_SPTR pp;
PCRE2_SPTR prev; PCRE2_SPTR prev;
PCRE2_SPTR saved_eptr; PCRE2_SPTR saved_eptr;
PCRE2_OFFSET offset; PCRE2_SIZE offset;
PCRE2_OFFSET stacksave[REC_STACK_SAVE_MAX]; PCRE2_SIZE save_offset1, save_offset2, save_offset3;
PCRE2_OFFSET save_offset1, save_offset2, save_offset3;
uint32_t number; uint32_t number;
uint32_t op; uint32_t op;
@ -597,7 +686,7 @@ BOOL prev_is_word;
eptrblock newptrb; eptrblock newptrb;
recursion_info new_recursive; recursion_info new_recursive;
#endif /* NO_RECURSE not defined */ #endif /* HEAP_MATCH_RECURSE not defined */
/* To save space on the stack and in the heap frame, I have doubled up on some /* To save space on the stack and in the heap frame, I have doubled up on some
of the local variables that are used only in localised parts of the code, but of the local variables that are used only in localised parts of the code, but
@ -622,19 +711,19 @@ prop_fail_result = 0;
/* This label is used for tail recursion, which is used in a few cases even /* This label is used for tail recursion, which is used in a few cases even
when NO_RECURSE is not defined, in order to reduce the amount of stack that is when HEAP_MATCH_RECURSE is not defined, in order to reduce the amount of stack
used. Thanks to Ian Taylor for noticing this possibility and sending the that is used. Thanks to Ian Taylor for noticing this possibility and sending
original patch. */ the original patch. */
TAIL_RECURSE: TAIL_RECURSE:
/* OK, now we can get on with the real code of the function. Recursive calls /* OK, now we can get on with the real code of the function. Recursive calls
are specified by the macro RMATCH and RRETURN is used to return. When are specified by the macro RMATCH and RRETURN is used to return. When
NO_RECURSE is *not* defined, these just turn into a recursive call to match() HEAP_MATCH_RECURSE is *not* defined, these just turn into a recursive call to
and a "return", respectively. However, RMATCH isn't like a function call match() and a "return", respectively. However, RMATCH isn't like a function
because it's quite a complicated macro. It has to be used in one particular call because it's quite a complicated macro. It has to be used in one
way. This shouldn't, however, impact performance when true recursion is being particular way. This shouldn't, however, impact performance when true recursion
used. */ is being used. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UTF
utf = (mb->poptions & PCRE2_UTF) != 0; utf = (mb->poptions & PCRE2_UTF) != 0;
@ -668,7 +757,7 @@ if (mb->match_function_type == MATCH_CBEGROUP)
mb->match_function_type = 0; mb->match_function_type = 0;
} }
/* Now start processing the opcodes. */ /* Now, at last, we can start processing the opcodes. */
for (;;) for (;;)
{ {
@ -1205,9 +1294,9 @@ for (;;)
cb.callout_number = ecode[1]; cb.callout_number = ecode[1];
cb.offset_vector = mb->ovector; cb.offset_vector = mb->ovector;
cb.subject = mb->start_subject; cb.subject = mb->start_subject;
cb.subject_length = (int)(mb->end_subject - mb->start_subject); cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
cb.start_match = (int)(mstart - mb->start_subject); cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject);
cb.current_position = (int)(eptr - mb->start_subject); cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
cb.pattern_position = GET(ecode, 2); cb.pattern_position = GET(ecode, 2);
cb.next_item_length = GET(ecode, 2 + LINK_SIZE); cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
cb.capture_top = offset_top/2; cb.capture_top = offset_top/2;
@ -1588,9 +1677,9 @@ for (;;)
cb.callout_number = ecode[1]; cb.callout_number = ecode[1];
cb.offset_vector = mb->ovector; cb.offset_vector = mb->ovector;
cb.subject = mb->start_subject; cb.subject = mb->start_subject;
cb.subject_length = (int)(mb->end_subject - mb->start_subject); cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
cb.start_match = (int)(mstart - mb->start_subject); cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject);
cb.current_position = (int)(eptr - mb->start_subject); cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
cb.pattern_position = GET(ecode, 2); cb.pattern_position = GET(ecode, 2);
cb.next_item_length = GET(ecode, 2 + LINK_SIZE); cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
cb.capture_top = offset_top/2; cb.capture_top = offset_top/2;
@ -1613,7 +1702,7 @@ for (;;)
all the potential data. There may be up to 65535 such values, which is too all the potential data. There may be up to 65535 such values, which is too
large to put on the stack, but using malloc for small numbers seems large to put on the stack, but using malloc for small numbers seems
expensive. As a compromise, the stack is used when there are no more than expensive. As a compromise, the stack is used when there are no more than
REC_STACK_SAVE_MAX values to store; otherwise malloc is used. OP_RECURSE_STACK_SAVE_MAX values to store; otherwise malloc is used.
There are also other values that have to be saved. We use a chained There are also other values that have to be saved. We use a chained
sequence of blocks that actually live on the stack. Thanks to Robin Houston sequence of blocks that actually live on the stack. Thanks to Robin Houston
@ -1626,12 +1715,11 @@ for (;;)
uint32_t recno; uint32_t recno;
callpat = mb->start_code + GET(ecode, 1); callpat = mb->start_code + GET(ecode, 1);
recno = (callpat == mb->start_code)? 0 : recno = (callpat == mb->start_code)? 0 : GET2(callpat, 1 + LINK_SIZE);
GET2(callpat, 1 + LINK_SIZE);
/* Check for repeating a recursion without advancing the subject pointer. /* Check for repeating a pattern recursion without advancing the subject
This should catch convoluted mutual recursions. (Some simple cases are pointer. This should catch convoluted mutual recursions. (Some simple
caught at compile time.) */ cases are caught at compile time.) */
for (ri = mb->recursive; ri != NULL; ri = ri->prevrec) for (ri = mb->recursive; ri != NULL; ri = ri->prevrec)
if (recno == ri->group_num && eptr == ri->subject_position) if (recno == ri->group_num && eptr == ri->subject_position)
@ -1641,6 +1729,7 @@ for (;;)
new_recursive.group_num = recno; new_recursive.group_num = recno;
new_recursive.saved_capture_last = mb->capture_last; new_recursive.saved_capture_last = mb->capture_last;
new_recursive.saved_max = mb->offset_end;
new_recursive.subject_position = eptr; new_recursive.subject_position = eptr;
new_recursive.prevrec = mb->recursive; new_recursive.prevrec = mb->recursive;
mb->recursive = &new_recursive; mb->recursive = &new_recursive;
@ -1649,25 +1738,43 @@ for (;;)
ecode += 1 + LINK_SIZE; ecode += 1 + LINK_SIZE;
/* Now save the offset data */ /* When we are using the system stack for match() recursion we can call a
function that uses the system stack for preserving the ovector while
processing the pattern recursion, but only if the ovector is small
enough. */
new_recursive.saved_max = mb->offset_end; #ifndef HEAP_MATCH_RECURSE
if (new_recursive.saved_max <= REC_STACK_SAVE_MAX) if (new_recursive.saved_max <= OP_RECURSE_STACK_SAVE_MAX)
new_recursive.offset_save = stacksave;
else
{ {
new_recursive.offset_save = (PCRE2_OFFSET *) rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
(mb->memctl.malloc(new_recursive.saved_max * sizeof(PCRE2_OFFSET), eptrb, rdepth);
mb->memctl.memory_data)); mb->recursive = new_recursive.prevrec;
if (new_recursive.offset_save == NULL) RRETURN(PCRE2_ERROR_NOMEMORY); if (rrc != MATCH_MATCH && rrc != MATCH_ACCEPT) RRETURN(rrc);
}
memcpy(new_recursive.offset_save, mb->ovector,
new_recursive.saved_max * sizeof(PCRE2_OFFSET));
/* OK, now we can do the recursion. After processing each alternative, /* Set where we got to in the subject, and reset the start, in case
restore the offset data and the last captured value. If there were nested it was changed by \K. This *is* propagated back out of a recursion,
recursions, mb->recursive might be changed, so reset it before looping. for Perl compatibility. */
*/
eptr = mb->end_match_ptr;
mstart = mb->start_match_ptr;
break; /* End of processing OP_RECURSE */
}
#endif
/* If the ovector is too big, or if we are using the heap for match()
recursion, we have to use the heap for saving the ovector. */
new_recursive.ovec_save = (PCRE2_SIZE *)
(mb->memctl.malloc(new_recursive.saved_max * sizeof(PCRE2_SIZE),
mb->memctl.memory_data));
if (new_recursive.ovec_save == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
memcpy(new_recursive.ovec_save, mb->ovector,
new_recursive.saved_max * sizeof(PCRE2_SIZE));
/* Do the recursion. After processing each alternative, restore the
ovector data and the last captured value. This code has the same overall
logic as the code in the op_recurse_ovecsave() function, but is adapted
to use RMATCH/RRETURN and to release the heap block containing the saved
ovector. */
cbegroup = (*callpat >= OP_SBRA); cbegroup = (*callpat >= OP_SBRA);
do do
@ -1675,16 +1782,16 @@ for (;;)
if (cbegroup) mb->match_function_type = MATCH_CBEGROUP; if (cbegroup) mb->match_function_type = MATCH_CBEGROUP;
RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top, RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
mb, eptrb, RM6); mb, eptrb, RM6);
memcpy(mb->ovector, new_recursive.offset_save, memcpy(mb->ovector, new_recursive.ovec_save,
new_recursive.saved_max * sizeof(PCRE2_OFFSET)); new_recursive.saved_max * sizeof(PCRE2_SIZE));
mb->capture_last = new_recursive.saved_capture_last; mb->capture_last = new_recursive.saved_capture_last;
mb->recursive = new_recursive.prevrec; mb->recursive = new_recursive.prevrec;
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
{ {
if (new_recursive.offset_save != stacksave) mb->memctl.free(new_recursive.ovec_save, mb->memctl.memory_data);
mb->memctl.free(new_recursive.offset_save, mb->memctl.memory_data);
/* Set where we got to in the subject, and reset the start in case /* Set where we got to in the subject, and reset the start, in case
it was changed by \K. This *is* propagated back out of a recursion, it was changed by \K. This *is* propagated back out of a recursion,
for Perl compatibility. */ for Perl compatibility. */
@ -1698,26 +1805,23 @@ for (;;)
are defined in a range that can be tested for. */ are defined in a range that can be tested for. */
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX) if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
RRETURN(MATCH_NOMATCH); {
rrc = MATCH_NOMATCH;
goto RECURSION_RETURN;
}
/* Any return code other than NOMATCH is an error. */ /* Any return code other than NOMATCH is an error. */
if (rrc != MATCH_NOMATCH) if (rrc != MATCH_NOMATCH) goto RECURSION_RETURN;
{
if (new_recursive.offset_save != stacksave)
mb->memctl.free(new_recursive.offset_save, mb->memctl.memory_data);
RRETURN(rrc);
}
mb->recursive = &new_recursive; mb->recursive = &new_recursive;
callpat += GET(callpat, 1); callpat += GET(callpat, 1);
} }
while (*callpat == OP_ALT); while (*callpat == OP_ALT);
RECURSION_RETURN:
mb->recursive = new_recursive.prevrec; mb->recursive = new_recursive.prevrec;
if (new_recursive.offset_save != stacksave) mb->memctl.free(new_recursive.ovec_save, mb->memctl.memory_data);
mb->memctl.free(new_recursive.offset_save, mb->memctl.memory_data); RRETURN(rrc);
RRETURN(MATCH_NOMATCH);
} }
RECURSION_MATCHED: RECURSION_MATCHED:
@ -1840,8 +1944,8 @@ for (;;)
if (offset > offset_top) if (offset > offset_top)
{ {
register PCRE2_OFFSET *iptr = mb->ovector + offset_top; register PCRE2_SIZE *iptr = mb->ovector + offset_top;
register PCRE2_OFFSET *iend = mb->ovector + offset; register PCRE2_SIZE *iend = mb->ovector + offset;
while (iptr < iend) *iptr++ = PCRE2_UNSET; while (iptr < iend) *iptr++ = PCRE2_UNSET;
} }
@ -6023,7 +6127,7 @@ for (;;)
match(), the RRETURN() macro jumps here. The number that is saved in match(), the RRETURN() macro jumps here. The number that is saved in
frame->Xwhere indicates which label we actually want to return to. */ frame->Xwhere indicates which label we actually want to return to. */
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
#define LBL(val) case val: goto L_RM##val; #define LBL(val) case val: goto L_RM##val;
HEAP_RETURN: HEAP_RETURN:
switch (frame->Xwhere) switch (frame->Xwhere)
@ -6048,7 +6152,7 @@ switch (frame->Xwhere)
return PCRE2_ERROR_INTERNAL; return PCRE2_ERROR_INTERNAL;
} }
#undef LBL #undef LBL
#endif /* NO_RECURSE */ #endif /* HEAP_MATCH_RECURSE */
} }
@ -6058,7 +6162,7 @@ switch (frame->Xwhere)
Undefine all the macros that were defined above to handle this. */ Undefine all the macros that were defined above to handle this. */
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
#undef eptr #undef eptr
#undef ecode #undef ecode
#undef mstart #undef mstart
@ -6091,10 +6195,9 @@ Undefine all the macros that were defined above to handle this. */
#undef save_offset1 #undef save_offset1
#undef save_offset2 #undef save_offset2
#undef save_offset3 #undef save_offset3
#undef stacksave
#undef newptrb #undef newptrb
#endif /* NO_RECURSE */ #endif /* HEAP_MATCH_RECURSE */
/* These two are defined as macros in both cases */ /* These two are defined as macros in both cases */
@ -6105,7 +6208,7 @@ Undefine all the macros that were defined above to handle this. */
***************************************************************************/ ***************************************************************************/
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
/************************************************* /*************************************************
* Release allocated heap frames * * Release allocated heap frames *
*************************************************/ *************************************************/
@ -6131,7 +6234,7 @@ while (nextframe != NULL)
mb->stack_memctl.free(oldframe, mb->stack_memctl.memory_data); mb->stack_memctl.free(oldframe, mb->stack_memctl.memory_data);
} }
} }
#endif /* NO_RECURSE */ #endif /* HEAP_MATCH_RECURSE */
@ -6160,8 +6263,8 @@ Returns: > 0 => success; value is the number of ovector pairs filled
*/ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, int length, pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_OFFSET start_offset, uint32_t options, pcre2_match_data *match_data, PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext) pcre2_match_context *mcontext)
{ {
int rc; int rc;
@ -6198,23 +6301,24 @@ is used below, and it expects NLBLOCK to be defined as a pointer. */
match_block actual_match_block; match_block actual_match_block;
match_block *mb = &actual_match_block; match_block *mb = &actual_match_block;
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
heapframe frame_zero; heapframe frame_zero;
frame_zero.Xprevframe = NULL; /* Marks the top level */ frame_zero.Xprevframe = NULL; /* Marks the top level */
frame_zero.Xnextframe = NULL; /* None are allocated yet */ frame_zero.Xnextframe = NULL; /* None are allocated yet */
mb->match_frames_base = &frame_zero; mb->match_frames_base = &frame_zero;
#endif #endif
/* A negative length implies a zero-terminated subject string. */ /* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
subject string. */
if (length < 0) length = PRIV(strlen)(subject); if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
/* Plausibility checks */ /* Plausibility checks */
if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
if (code == NULL || subject == NULL || match_data == NULL) if (code == NULL || subject == NULL || match_data == NULL)
return PCRE2_ERROR_NULL; return PCRE2_ERROR_NULL;
if ((int)start_offset > length) return PCRE2_ERROR_BADOFFSET; if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
/* Check that the first field in the block is the magic number. If it is not, /* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
@ -6261,7 +6365,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
return match_data->rc; return match_data->rc;
} }
#if PCRE2_CODE_UNIT_WIDTH != 32 #if PCRE2_CODE_UNIT_WIDTH != 32
if (start_offset > 0 && (int)start_offset < length && if (start_offset > 0 && start_offset < length &&
NOT_FIRSTCHAR(subject[start_offset])) NOT_FIRSTCHAR(subject[start_offset]))
return PCRE2_ERROR_BADUTFOFFSET; return PCRE2_ERROR_BADUTFOFFSET;
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
@ -6296,7 +6400,7 @@ if (mcontext == NULL)
{ {
mb->callout = NULL; mb->callout = NULL;
mb->memctl = re->memctl; mb->memctl = re->memctl;
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
mb->stack_memctl = re->memctl; mb->stack_memctl = re->memctl;
#endif #endif
} }
@ -6305,7 +6409,7 @@ else
mb->callout = mcontext->callout; mb->callout = mcontext->callout;
mb->callout_data = mcontext->callout_data; mb->callout_data = mcontext->callout_data;
mb->memctl = mcontext->memctl; mb->memctl = mcontext->memctl;
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
mb->stack_memctl = mcontext->stack_memctl; mb->stack_memctl = mcontext->stack_memctl;
#endif #endif
} }
@ -6394,7 +6498,7 @@ offsets, and the top third is working space. */
if (re->top_backref >= match_data->oveccount) if (re->top_backref >= match_data->oveccount)
{ {
ocount = re->top_backref * 3 + 3; ocount = re->top_backref * 3 + 3;
mb->ovector = (PCRE2_OFFSET *)(mb->memctl.malloc(ocount * sizeof(PCRE2_OFFSET), mb->ovector = (PCRE2_SIZE *)(mb->memctl.malloc(ocount * sizeof(PCRE2_SIZE),
mb->memctl.memory_data)); mb->memctl.memory_data));
if (mb->ovector == NULL) return PCRE2_ERROR_NOMEMORY; if (mb->ovector == NULL) return PCRE2_ERROR_NOMEMORY;
using_temporary_offsets = TRUE; using_temporary_offsets = TRUE;
@ -6417,8 +6521,8 @@ in case they inspect these fields. */
if (ocount > 0) if (ocount > 0)
{ {
register PCRE2_OFFSET *iptr = mb->ovector + ocount; register PCRE2_SIZE *iptr = mb->ovector + ocount;
register PCRE2_OFFSET *iend = iptr - re->top_bracket; register PCRE2_SIZE *iend = iptr - re->top_bracket;
if (iend < mb->ovector + 2) iend = mb->ovector + 2; if (iend < mb->ovector + 2) iend = mb->ovector + 2;
while (--iptr >= iend) *iptr = PCRE2_UNSET; while (--iptr >= iend) *iptr = PCRE2_UNSET;
mb->ovector[0] = mb->ovector[1] = PCRE2_UNSET; mb->ovector[0] = mb->ovector[1] = PCRE2_UNSET;
@ -6782,7 +6886,7 @@ for(;;)
ENDLOOP: ENDLOOP:
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
release_match_heapframes(&frame_zero, mb); release_match_heapframes(&frame_zero, mb);
#endif #endif
@ -6810,7 +6914,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
if (arg_offset_max >= 4) if (arg_offset_max >= 4)
{ {
memcpy(match_data->ovector + 2, mb->ovector + 2, memcpy(match_data->ovector + 2, mb->ovector + 2,
(arg_offset_max - 2) * sizeof(PCRE2_OFFSET)); (arg_offset_max - 2) * sizeof(PCRE2_SIZE));
} }
if (mb->end_offset_top > arg_offset_max) mb->capture_last |= OVFLBIT; if (mb->end_offset_top > arg_offset_max) mb->capture_last |= OVFLBIT;
mb->memctl.free(mb->ovector, mb->memctl.memory_data); mb->memctl.free(mb->ovector, mb->memctl.memory_data);
@ -6834,7 +6938,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
if (mb->end_offset_top/2 <= re->top_bracket) if (mb->end_offset_top/2 <= re->top_bracket)
{ {
register PCRE2_OFFSET *iptr, *iend; register PCRE2_SIZE *iptr, *iend;
int resetcount = re->top_bracket + 1; int resetcount = re->top_bracket + 1;
if (resetcount > match_data->oveccount) resetcount = match_data->oveccount; if (resetcount > match_data->oveccount) resetcount = match_data->oveccount;
iptr = match_data->ovector + mb->end_offset_top; iptr = match_data->ovector + mb->end_offset_top;

View File

@ -55,7 +55,7 @@ PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext) pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
{ {
pcre2_match_data *yield = PRIV(memctl_malloc)( pcre2_match_data *yield = PRIV(memctl_malloc)(
sizeof(pcre2_match_data) + 3*oveccount*sizeof(PCRE2_OFFSET), sizeof(pcre2_match_data) + 3*oveccount*sizeof(PCRE2_SIZE),
(pcre2_memctl *)gcontext); (pcre2_memctl *)gcontext);
yield->oveccount = oveccount; yield->oveccount = oveccount;
return yield; return yield;
@ -94,7 +94,7 @@ if (match_data != NULL)
* Get left-most code unit in match * * Get left-most code unit in match *
*************************************************/ *************************************************/
PCRE2_EXP_DEFN PCRE2_OFFSET PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
pcre2_get_leftchar(pcre2_match_data *match_data) pcre2_get_leftchar(pcre2_match_data *match_data)
{ {
return match_data->leftchar; return match_data->leftchar;
@ -118,7 +118,7 @@ return match_data->mark;
* Get pointer to ovector * * Get pointer to ovector *
*************************************************/ *************************************************/
PCRE2_EXP_DEFN PCRE2_OFFSET * PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION
pcre2_get_ovector_pointer(pcre2_match_data *match_data) pcre2_get_ovector_pointer(pcre2_match_data *match_data)
{ {
return match_data->ovector; return match_data->ovector;
@ -142,7 +142,7 @@ return match_data->oveccount;
* Get right-most code unit in match * * Get right-most code unit in match *
*************************************************/ *************************************************/
PCRE2_EXP_DEFN PCRE2_OFFSET PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
pcre2_get_rightchar(pcre2_match_data *match_data) pcre2_get_rightchar(pcre2_match_data *match_data)
{ {
return match_data->rightchar; return match_data->rightchar;
@ -154,7 +154,7 @@ return match_data->rightchar;
* Get starting code unit in match * * Get starting code unit in match *
*************************************************/ *************************************************/
PCRE2_EXP_DEFN PCRE2_OFFSET PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
pcre2_get_startchar(pcre2_match_data *match_data) pcre2_get_startchar(pcre2_match_data *match_data)
{ {
return match_data->startchar; return match_data->startchar;

View File

@ -335,7 +335,7 @@ size_t *lensp;
pcre2_memctl *memp; pcre2_memctl *memp;
PCRE2_UCHAR **listp; PCRE2_UCHAR **listp;
PCRE2_UCHAR *sp; PCRE2_UCHAR *sp;
PCRE2_OFFSET *ovector; PCRE2_SIZE *ovector;
if ((count = match_data->rc) < 0) return count; if ((count = match_data->rc) < 0) return count;

View File

@ -58,7 +58,7 @@ strings. */
/* This function should never be called when UTF is not supported. */ /* This function should never be called when UTF is not supported. */
int int
PRIV(valid_utf)(PCRE2_SPTR string, int length, PCRE2_OFFSET *erroroffset) PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
{ {
(void)string; (void)string;
(void)length; (void)length;
@ -81,7 +81,7 @@ invalid string are then undefined.
Arguments: Arguments:
string points to the string string points to the string
length length of string, or -1 if the string is zero-terminated length length of string
errp pointer to an error position offset variable errp pointer to an error position offset variable
Returns: == 0 if the string is a valid UTF string Returns: == 0 if the string is a valid UTF string
@ -89,17 +89,11 @@ Returns: == 0 if the string is a valid UTF string
*/ */
int int
PRIV(valid_utf)(PCRE2_SPTR string, int length, PCRE2_OFFSET *erroroffset) PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
{ {
register PCRE2_SPTR p; register PCRE2_SPTR p;
register uint32_t c; register uint32_t c;
if (length < 0)
{
for (p = string; *p != 0; p++);
length = (int)(p - string);
}
/* ----------------- Check a UTF-8 string ----------------- */ /* ----------------- Check a UTF-8 string ----------------- */
#if PCRE2_CODE_UNIT_WIDTH == 8 #if PCRE2_CODE_UNIT_WIDTH == 8
@ -155,7 +149,7 @@ for (p = string; length-- > 0; p++)
} }
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes (1-5) */ ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes (1-5) */
if (length < (int)ab) /* Missing bytes */ if (length < ab) /* Missing bytes */
{ {
*erroroffset = (int)(p - string); *erroroffset = (int)(p - string);
switch(ab - length) switch(ab - length)

View File

@ -77,8 +77,8 @@ int utf8;
uint32_t option_bits; uint32_t option_bits;
uint32_t newline; uint32_t newline;
PCRE2_OFFSET erroroffset; PCRE2_SIZE erroroffset;
PCRE2_OFFSET *ovector; PCRE2_SIZE *ovector;
size_t subject_length; size_t subject_length;
pcre2_match_data *match_data; pcre2_match_data *match_data;
@ -314,7 +314,7 @@ crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
for (;;) for (;;)
{ {
uint32_t options = 0; /* Normally no options */ uint32_t options = 0; /* Normally no options */
PCRE2_OFFSET start_offset = ovector[1]; /* Start at end of previous match */ PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */
/* If the previous match was for an empty string, we are finished if we are /* If the previous match was for an empty string, we are finished if we are
at the end of the subject. Otherwise, arrange to run another match at the at the end of the subject. Otherwise, arrange to run another match at the

View File

@ -179,7 +179,7 @@ static uint32_t recursion_limit = 0;
static pcre2_compile_context *compile_context; static pcre2_compile_context *compile_context;
static pcre2_match_context *match_context; static pcre2_match_context *match_context;
static pcre2_match_data *match_data; static pcre2_match_data *match_data;
static PCRE2_OFFSET *offsets; static PCRE2_SIZE *offsets;
static BOOL count_only = FALSE; static BOOL count_only = FALSE;
static BOOL do_colour = FALSE; static BOOL do_colour = FALSE;
@ -1736,14 +1736,14 @@ while (ptr < endptr)
if (line_offsets) if (line_offsets)
fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr), fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
offsets[1] - offsets[0]); (int)(offsets[1] - offsets[0]));
/* Handle --file-offsets */ /* Handle --file-offsets */
else if (file_offsets) else if (file_offsets)
fprintf(stdout, "%d,%d\n", fprintf(stdout, "%d,%d\n",
(int)(filepos + matchptr + offsets[0] - ptr), (int)(filepos + matchptr + offsets[0] - ptr),
offsets[1] - offsets[0]); (int)(offsets[1] - offsets[0]));
/* Handle --only-matching, which may occur many times */ /* Handle --only-matching, which may occur many times */
@ -2451,7 +2451,7 @@ compile_pattern(patstr *p, int options, int popts, int fromfile,
const char *fromtext, int count) const char *fromtext, int count)
{ {
unsigned char buffer[PATBUFSIZE]; unsigned char buffer[PATBUFSIZE];
PCRE2_OFFSET erroffset; PCRE2_SIZE erroffset;
char *ps = p->string; char *ps = p->string;
unsigned int patlen = strlen(ps); unsigned int patlen = strlen(ps);
int errcode; int errcode;
@ -2485,16 +2485,16 @@ pcre2_get_error_message(errcode, buffer, PATBUFSIZE);
if (fromfile) if (fromfile)
{ {
fprintf(stderr, "pcre2grep: Error in regex in line %d of %s " fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
"at offset %d: %s\n", count, fromtext, erroffset, buffer); "at offset %d: %s\n", count, fromtext, (int)erroffset, buffer);
} }
else else
{ {
if (count == 0) if (count == 0)
fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n", fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
fromtext, erroffset, buffer); fromtext, (int)erroffset, buffer);
else else
fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n", fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
ordin(count), fromtext, erroffset, buffer); ordin(count), fromtext, (int)erroffset, buffer);
} }
return FALSE; return FALSE;

View File

@ -203,7 +203,7 @@ Returns: 0 on success
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
regcomp(regex_t *preg, const char *pattern, int cflags) regcomp(regex_t *preg, const char *pattern, int cflags)
{ {
PCRE2_OFFSET erroffset; PCRE2_SIZE erroffset;
int errorcode; int errorcode;
int options = 0; int options = 0;
int re_nsub = 0; int re_nsub = 0;

View File

@ -401,7 +401,7 @@ typedef struct modstruct {
uint16_t which; uint16_t which;
uint16_t type; uint16_t type;
uint32_t value; uint32_t value;
PCRE2_OFFSET offset; PCRE2_SIZE offset;
} modstruct; } modstruct;
static modstruct modlist[] = { static modstruct modlist[] = {
@ -1758,7 +1758,7 @@ free(block);
/* For recursion malloc/free, to test stacking calls */ /* For recursion malloc/free, to test stacking calls */
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
static void *my_stack_malloc(size_t size, void *data) static void *my_stack_malloc(size_t size, void *data)
{ {
void *block = malloc(size); void *block = malloc(size);
@ -1775,7 +1775,7 @@ if (show_memory)
fprintf(outfile, "stack_free %p\n", block); fprintf(outfile, "stack_free %p\n", block);
free(block); free(block);
} }
#endif /* NO_RECURSE */ #endif /* HEAP_MATCH_RECURSE */
/************************************************* /*************************************************
@ -2422,7 +2422,7 @@ static void *
check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c) check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
{ {
void *field = NULL; void *field = NULL;
PCRE2_OFFSET offset = m->offset; PCRE2_SIZE offset = m->offset;
if (restrict_for_perl_test) switch(m->which) if (restrict_for_perl_test) switch(m->which)
{ {
@ -2448,7 +2448,7 @@ switch (m->which)
/* Fall through for something that can also be in a match context. In this /* Fall through for something that can also be in a match context. In this
case the offset is taken from the other field. */ case the offset is taken from the other field. */
offset = (PCRE2_OFFSET)(m->value); offset = (PCRE2_SIZE)(m->value);
case MOD_CTM: /* Match context modifier */ case MOD_CTM: /* Match context modifier */
if (ctx == CTX_DEFDAT) field = PTR(default_dat_context); if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
@ -3310,7 +3310,7 @@ uint8_t *p = buffer;
const uint8_t *use_tables; const uint8_t *use_tables;
unsigned int delimiter = *p++; unsigned int delimiter = *p++;
int patlen, errorcode; int patlen, errorcode;
PCRE2_OFFSET erroroffset; PCRE2_SIZE erroroffset;
/* Initialize the context and pattern/data controls for this test from the /* Initialize the context and pattern/data controls for this test from the
defaults. */ defaults. */
@ -4403,7 +4403,7 @@ for (gmatched = 0;; gmatched++)
{ {
int i; int i;
uint8_t *nptr; uint8_t *nptr;
PCRE2_OFFSET *ovector; PCRE2_SIZE *ovector;
/* This is a check against a lunatic return value. */ /* This is a check against a lunatic return value. */
@ -4439,8 +4439,8 @@ for (gmatched = 0;; gmatched++)
ovector = FLD(match_data, ovector); ovector = FLD(match_data, ovector);
for (i = 0; i < 2*capcount; i += 2) for (i = 0; i < 2*capcount; i += 2)
{ {
PCRE2_OFFSET start = ovector[i]; PCRE2_SIZE start = ovector[i];
PCRE2_OFFSET end = ovector[i+1]; PCRE2_SIZE end = ovector[i+1];
if (start > end) if (start > end)
{ {
@ -4643,7 +4643,7 @@ for (gmatched = 0;; gmatched++)
else if (capcount == PCRE2_ERROR_PARTIAL) else if (capcount == PCRE2_ERROR_PARTIAL)
{ {
PCRE2_OFFSET leftchar = FLD(match_data, leftchar); PCRE2_SIZE leftchar = FLD(match_data, leftchar);
fprintf(outfile, "Partial match"); fprintf(outfile, "Partial match");
if (leftchar != FLD(match_data, startchar)) if (leftchar != FLD(match_data, startchar))
fprintf(outfile, " at offset %d", (int)FLD(match_data, startchar)); fprintf(outfile, " at offset %d", (int)FLD(match_data, startchar));
@ -4685,8 +4685,8 @@ for (gmatched = 0;; gmatched++)
else if (g_notempty != 0) /* There was a previous null match */ else if (g_notempty != 0) /* There was a previous null match */
{ {
uint16_t nl = FLD(compiled_code, newline_convention); uint16_t nl = FLD(compiled_code, newline_convention);
PCRE2_OFFSET start_offset = dat_datctl.offset; /* Where the match was */ PCRE2_SIZE start_offset = dat_datctl.offset; /* Where the match was */
PCRE2_OFFSET end_offset = start_offset + 1; PCRE2_SIZE end_offset = start_offset + 1;
if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY || if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
nl == PCRE2_NEWLINE_ANYCRLF) && nl == PCRE2_NEWLINE_ANYCRLF) &&
@ -4765,7 +4765,7 @@ for (gmatched = 0;; gmatched++)
if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
{ {
PCRE2_OFFSET end_offset = FLD(match_data, ovector)[1]; PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
/* We must now set up for the next iteration of a global search. If we have /* We must now set up for the next iteration of a global search. If we have
matched an empty string, first check to see if we are at the end of the matched an empty string, first check to see if we are at the end of the
@ -5278,7 +5278,7 @@ if (test_mode == PCRE8_MODE)
default_dat_context8 = pcre2_match_context_create_8(general_context8); default_dat_context8 = pcre2_match_context_create_8(general_context8);
dat_context8 = pcre2_match_context_create_8(general_context8); dat_context8 = pcre2_match_context_create_8(general_context8);
match_data8 = pcre2_match_data_create_8(max_oveccount, general_context8); match_data8 = pcre2_match_data_create_8(max_oveccount, general_context8);
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
(void)pcre2_set_recursion_memory_management_8(default_dat_context8, (void)pcre2_set_recursion_memory_management_8(default_dat_context8,
&my_stack_malloc, &my_stack_free, NULL); &my_stack_malloc, &my_stack_free, NULL);
#endif #endif
@ -5295,7 +5295,7 @@ if (test_mode == PCRE16_MODE)
default_dat_context16 = pcre2_match_context_create_16(general_context16); default_dat_context16 = pcre2_match_context_create_16(general_context16);
dat_context16 = pcre2_match_context_create_16(general_context16); dat_context16 = pcre2_match_context_create_16(general_context16);
match_data16 = pcre2_match_data_create_16(max_oveccount, general_context16); match_data16 = pcre2_match_data_create_16(max_oveccount, general_context16);
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
(void)pcre2_set_recursion_memory_management_16(default_dat_context16, (void)pcre2_set_recursion_memory_management_16(default_dat_context16,
&my_stack_malloc, &my_stack_free, NULL); &my_stack_malloc, &my_stack_free, NULL);
#endif #endif
@ -5312,7 +5312,7 @@ if (test_mode == PCRE32_MODE)
default_dat_context32 = pcre2_match_context_create_32(general_context32); default_dat_context32 = pcre2_match_context_create_32(general_context32);
dat_context32 = pcre2_match_context_create_32(general_context32); dat_context32 = pcre2_match_context_create_32(general_context32);
match_data32 = pcre2_match_data_create_32(max_oveccount, general_context32); match_data32 = pcre2_match_data_create_32(max_oveccount, general_context32);
#ifdef NO_RECURSE #ifdef HEAP_MATCH_RECURSE
(void)pcre2_set_recursion_memory_management_32(default_dat_context32, (void)pcre2_set_recursion_memory_management_32(default_dat_context32,
&my_stack_malloc, &my_stack_free, NULL); &my_stack_malloc, &my_stack_free, NULL);
#endif #endif

6
testdata/testinput2 vendored
View File

@ -4043,4 +4043,10 @@ a random value. /Ix
ca ca
cd cd
# This should test both paths for processing OP_RECURSE.
/(?(R)a+|(?R)b)/
aaaabcde
aaaabcde\=ovector=100
# End of testinput2 # End of testinput2

View File

@ -13672,4 +13672,12 @@ No match
0: 0:
0+ cd 0+ cd
# This should test both paths for processing OP_RECURSE.
/(?(R)a+|(?R)b)/
aaaabcde
0: aaaab
aaaabcde\=ovector=100
0: aaaab
# End of testinput2 # End of testinput2