Change lengths to PCRE2_SIZE and revise OP_RECURSE processing.

This commit is contained in:
Philip.Hazel 2014-08-16 09:46:58 +00:00
parent d108fc47ad
commit 7efba85b56
23 changed files with 499 additions and 369 deletions

View File

@ -429,7 +429,7 @@ endif # WITH_PCRE8
if WITH_JIT
TESTS += pcre2_jit_test
noinst_PROGRAMS += pcre2_jit_test
pcre2_jit_test_SOURCES = pcre2_jit_test.c
pcre2_jit_test_SOURCES = src/pcre2_jit_test.c
pcre2_jit_test_CFLAGS = $(AM_CFLAGS)
pcre2_jit_test_LDADD =
if WITH_PCRE8

View File

@ -523,10 +523,10 @@ if test "$enable_utf" = "yes"; then
fi
if test "$enable_stack_for_recursion" = "no"; then
AC_DEFINE([NO_RECURSE], [], [
AC_DEFINE([HEAP_MATCH_RECURSE], [], [
PCRE2 uses recursive function calls to handle backtracking while
matching. This can sometimes be a problem on systems that have
stacks of limited size. Define NO_RECURSE to any value to get a
stacks of limited size. Define HEAP_MATCH_RECURSE to any value to get a
version that doesn't use recursion in the match() function; instead
it creates its own stack by steam using memory from the heap. For more
detail, see the comments and other stuff just above the match() function.])
@ -608,7 +608,7 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT_RECURSION], [$with_match_limit_recursion], [
increase the recursion depth. In some environments it is desirable
to limit the depth of recursive calls of match() more strictly, in
order to restrict the maximum amount of stack (or heap, if
NO_RECURSE is defined) that is used. The value of
HEAP_MATCH_RECURSE is defined) that is used. The value of
MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To
have any useful effect, it must be less than the value of
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT.

View File

@ -1,13 +1,13 @@
/* src/config.h. Generated from config.h.in by configure. */
/* src/config.h.in. Generated from configure.ac by autoheader. */
/* PCRE is written in Standard C, but there are a few non-standard things it
/* PCRE2 is written in Standard C, but there are a few non-standard things it
can cope with, allowing it to run on SunOS4 and other "close to standard"
systems.
In environments that support the GNU autotools, config.h.in is converted into
config.h by the "configure" script. In environments that use CMake,
config-cmake.in is converted into config.h. If you are going to build PCRE "by
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
hand" without using "configure" or CMake, you should copy the distributed
config.h.generic to config.h, and edit the macro definitions to be the way you
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
@ -24,31 +24,28 @@ macros are listed as a commented #undef in config.h.generic. Macros such as
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
sure both macros are undefined; an emulation function will then be used. */
/* By default, the \R escape sequence matches any Unicode line ending
character or sequence of characters. If BSR_ANYCRLF is defined (to any
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
The build-time default can be overridden by the user of PCRE at runtime. */
The build-time default can be overridden by the user of PCRE2 at runtime.
*/
/* #undef BSR_ANYCRLF */
/* If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro to any value. You must also edit the
NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15).
On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is
automatically adjusted. When EBCDIC is set, PCRE assumes that all input
strings are in EBCDIC. If you do not define this macro, PCRE will assume
input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
a version of PCRE that supports both EBCDIC and UTF-8/16/32. */
character codes, define this macro to any value. When EBCDIC is set, PCRE2
assumes that all input strings are in EBCDIC. If you do not define this
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
is not possible to build a version of PCRE2 that supports both EBCDIC and
UTF-8/16/32. */
/* #undef EBCDIC */
/* In an EBCDIC environment, define this macro to any value to arrange for the
NL character to be 0x25 instead of the default 0x15. NL plays the role that
LF does in an ASCII/Unicode environment. The value must also be set in the
NEWLINE macro below. On systems that can use "configure" or CMake to set
EBCDIC_NL25, the adjustment of NEWLINE is automatic. */
LF does in an ASCII/Unicode environment. */
/* #undef EBCDIC_NL25 */
/* Define to 1 if you have the `bcopy' function. */
@ -126,11 +123,19 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to 1 if you have the <zlib.h> header file. */
/* #undef HAVE_ZLIB_H */
/* PCRE2 uses recursive function calls to handle backtracking while matching.
This can sometimes be a problem on systems that have stacks of limited
size. Define HEAP_MATCH_RECURSE to any value to get a version that doesn't
use recursion in the match() function; instead it creates its own stack by
steam using memory from the heap. For more detail, see the comments and
other stuff just above the match() function. */
/* #undef HEAP_MATCH_RECURSE */
/* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 64K long. This covers the vast majority of cases.
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
for longer patterns in extreme cases. */
However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
allows for longer patterns in extreme cases. */
#ifndef LINK_SIZE
#define LINK_SIZE 2
#endif
@ -144,7 +149,7 @@ sure both macros are undefined; an emulation function will then be used. */
/* The value of MATCH_LIMIT determines the default number of times the
internal match() function can be called during a single execution of
pcre_exec(). There is a runtime interface for setting a different limit.
pcre2_match(). There is a runtime interface for setting a different limit.
The limit exists in order to catch runaway regular expressions that take
for ever to determine that they do not match. The default is set very large
so that it does not accidentally catch legitimate cases. */
@ -155,11 +160,11 @@ sure both macros are undefined; an emulation function will then be used. */
/* The above limit applies to all calls of match(), whether or not they
increase the recursion depth. In some environments it is desirable to limit
the depth of recursive calls of match() more strictly, in order to restrict
the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
match(). To have any useful effect, it must be less than the value of
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
a runtime method for setting a different limit. */
the maximum amount of stack (or heap, if HEAP_MATCH_RECURSE is defined)
that is used. The value of MATCH_LIMIT_RECURSION applies only to recursive
calls of match(). To have any useful effect, it must be less than the value
of MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There
is a runtime method for setting a different limit. */
#ifndef MATCH_LIMIT_RECURSION
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
#endif
@ -178,27 +183,14 @@ sure both macros are undefined; an emulation function will then be used. */
#define MAX_NAME_SIZE 32
#endif
/* The value of NEWLINE determines the default newline character sequence.
PCRE client programs can override this by selecting other values at run
time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338
(CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or
3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and
0x25) that are used as the NL line terminator that is equivalent to ASCII
LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY),
or -2 (ANYCRLF). */
#ifndef NEWLINE
#define NEWLINE 10
/* The value of NEWLINE_DEFAULT determines the default newline character
sequence. PCRE2 client programs can override this by selecting other values
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
(ANYCRLF). */
#ifndef NEWLINE_DEFAULT
#define NEWLINE_DEFAULT 2
#endif
/* PCRE uses recursive function calls to handle backtracking while matching.
This can sometimes be a problem on systems that have stacks of limited
size. Define NO_RECURSE to any value to get a version that doesn't use
recursion in the match() function; instead it creates its own stack by
steam using pcre_recurse_malloc() to obtain memory from the heap. For more
detail, see the comments and other stuff just above the match() function.
*/
/* #undef NO_RECURSE */
/* Name of package */
#define PACKAGE "pcre2"
@ -209,7 +201,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_NAME "PCRE2"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "PCRE2 9.00-DEV"
#define PACKAGE_STRING "PCRE2 10.00-DEV"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre2"
@ -218,7 +210,7 @@ sure both macros are undefined; an emulation function will then be used. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION "9.00-DEV"
#define PACKAGE_VERSION "10.00-DEV"
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
parentheses (of any kind) in a pattern. This limits the amount of system
@ -227,15 +219,13 @@ sure both macros are undefined; an emulation function will then be used. */
#define PARENS_NEST_LIMIT 250
#endif
/* #undef PCRE2_EXP_DEFN */
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
pcregrep to hold parts of the file it is searching. This is also the
minimum value. The actual amount of memory used by pcregrep is three times
/* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by
pcre2grep to hold parts of the file it is searching. This is also the
minimum value. The actual amount of memory used by pcre2grep is three times
this number, because it allows for the buffering of "before" and "after"
lines. */
#ifndef PCREGREP_BUFSIZE
#define PCREGREP_BUFSIZE 20480
#ifndef PCRE2GREP_BUFSIZE
#define PCRE2GREP_BUFSIZE 20480
#endif
/* If you are compiling for a system other than a Unix-like system or
@ -247,21 +237,10 @@ sure both macros are undefined; an emulation function will then be used. */
This macro apears at the start of every exported function that is part
of the external API. It does not appear on functions that are "external"
in the C sense, but which are internal to the library. */
/* #undef PCRE_EXP_DEFN */
/* #undef PCRE2_EXP_DEFN */
/* Define to any value if linking statically (TODO: make nice with Libtool) */
/* #undef PCRE_STATIC */
/* When calling PCRE via the POSIX interface, additional working storage is
required for holding the pointers to capturing substrings because PCRE
requires three integers per substring, whereas the POSIX interface provides
only two. If the number of expected substrings is small, the wrapper
function uses space on the stack, because this is faster than using
malloc() for each call. The threshold above which the stack is no longer
used is defined by POSIX_MALLOC_THRESHOLD. */
#ifndef POSIX_MALLOC_THRESHOLD
#define POSIX_MALLOC_THRESHOLD 10
#endif
/* #undef PCRE2_STATIC */
/* Define to necessary symbol if this constant uses a non-standard name on
your system. */
@ -273,35 +252,35 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to any value to enable support for Just-In-Time compiling. */
/* #undef SUPPORT_JIT */
/* Define to any value to allow pcregrep to be linked with libbz2, so that it
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
is able to handle .bz2 files. */
/* #undef SUPPORT_LIBBZ2 */
/* Define to any value to allow pcretest to be linked with libedit. */
/* Define to any value to allow pcre2test to be linked with libedit. */
/* #undef SUPPORT_LIBEDIT */
/* Define to any value to allow pcretest to be linked with libreadline. */
/* Define to any value to allow pcre2test to be linked with libreadline. */
/* #undef SUPPORT_LIBREADLINE */
/* Define to any value to allow pcregrep to be linked with libz, so that it is
able to handle .gz files. */
/* Define to any value to allow pcre2grep to be linked with libz, so that it
is able to handle .gz files. */
/* #undef SUPPORT_LIBZ */
/* Define to any value to enable the 16 bit PCRE library. */
/* Define to any value to enable the 16 bit PCRE2 library. */
/* #undef SUPPORT_PCRE16 */
/* Define to any value to enable the 32 bit PCRE library. */
/* Define to any value to enable JIT support in pcre2grep. */
/* #undef SUPPORT_PCRE2GREP_JIT */
/* Define to any value to enable the 32 bit PCRE2 library. */
/* #undef SUPPORT_PCRE32 */
/* Define to any value to enable the 8 bit PCRE library. */
/* Define to any value to enable the 8 bit PCRE2 library. */
/* #undef SUPPORT_PCRE8 */
/* Define to any value to enable JIT support in pcregrep. */
/* #undef SUPPORT_PCREGREP_JIT */
/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
This will work even in an EBCDIC environment, but it is incompatible with
the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
ASCII/UTF-8/16/32, but not both at once. */
/* #undef SUPPORT_UTF */
@ -309,7 +288,7 @@ sure both macros are undefined; an emulation function will then be used. */
/* #undef SUPPORT_VALGRIND */
/* Version number of package */
#define VERSION "9.00-DEV"
#define VERSION "10.00-DEV"
/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */

View File

@ -271,12 +271,14 @@ typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
/* Offsets in the pattern (for errors) and in the subject (after a match) are
unsigned 32-bit numbers. We also define a value to indicate "unset" in the
offset vector (ovector). */
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE22,
including pattern offsets for errors and subject offsets after a match. We
define special values to indicate zero-terminated strings and unset offsets in
the offset vector (ovector). */
#define PCRE2_OFFSET PCRE2_UCHAR32
#define PCRE2_UNSET (~(PCRE2_OFFSET)0)
#define PCRE2_SIZE size_t
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
/* Generic types for opaque structures and JIT callback functions. These
declarations are defined in a macro that is expanded for each width later. */
@ -314,17 +316,17 @@ typedef struct pcre2_callout_block { \
int version; /* Identifies version of block */ \
/* ------------------------ Version 0 ------------------------------- */ \
uint32_t callout_number; /* Number compiled into pattern */ \
PCRE2_OFFSET *offset_vector; /* The offset vector */ \
PCRE2_SIZE *offset_vector; /* The offset vector */ \
PCRE2_SPTR subject; /* The subject being matched */ \
size_t subject_length; /* The length of the subject */ \
PCRE2_OFFSET start_match; /* Offset to start of this match attempt */ \
PCRE2_OFFSET current_position; /* Where we currently are in the subject */ \
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
uint32_t capture_top; /* Max current capture */ \
uint32_t capture_last; /* Most recently closed capture */ \
void *callout_data; /* Data passed in with the call */ \
/* ------------------- Added for Version 1 -------------------------- */ \
PCRE2_OFFSET pattern_position; /* Offset to next item in the pattern */ \
PCRE2_OFFSET next_item_length; /* Length of next item in the pattern */ \
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
/* ------------------- Added for Version 2 -------------------------- */ \
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
/* ------------------------------------------------------------------ */ \
@ -392,8 +394,8 @@ PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
#define PCRE2_COMPILE_FUNCTIONS \
PCRE2_EXP_DECL \
pcre2_code *pcre2_compile(PCRE2_SPTR, int, uint32_t, \
int *, PCRE2_OFFSET *, pcre2_compile_context *); \
pcre2_code *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, \
int *, PCRE2_SIZE *, pcre2_compile_context *); \
PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *);
@ -408,25 +410,25 @@ PCRE2_EXP_DECL int pcre2_pattern_info(const pcre2_code *, uint32_t, \
#define PCRE2_MATCH_FUNCTIONS \
PCRE2_EXP_DECL \
pcre2_match_data *pcre2_match_data_create(uint32_t, \
pcre2_general_context *); \
pcre2_match_data *pcre2_match_data_create(uint32_t, \
pcre2_general_context *); \
PCRE2_EXP_DECL \
pcre2_match_data *pcre2_match_data_create_from_pattern(pcre2_code *, \
pcre2_general_context *); \
PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, \
PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \
pcre2_match_data *, pcre2_match_context *, int *, \
size_t); \
PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \
PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \
pcre2_match_data *, pcre2_match_context *); \
PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_leftchar(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \
PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_OFFSET *pcre2_get_ovector_pointer(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_rightchar(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_startchar(pcre2_match_data *);
pcre2_match_data *pcre2_match_data_create_from_pattern(pcre2_code *, \
pcre2_general_context *); \
PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, \
PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
pcre2_match_data *, pcre2_match_context *, int *, \
size_t); \
PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
pcre2_match_data *, pcre2_match_context *); \
PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_leftchar(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \
PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_rightchar(pcre2_match_data *); \
PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *);
/* Convenience functions for handling matched substrings. */
@ -457,9 +459,9 @@ PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \
/* Functions for JIT processing */
#define PCRE2_JIT_FUNCTIONS \
PCRE2_EXP_DECL void pcre2_jit_compile(pcre2_code *, uint32_t); \
PCRE2_EXP_DECL int pcre2_jit_compile(pcre2_code *, uint32_t); \
PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \
PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
pcre2_match_data *, pcre2_match_context *, \
pcre2_jit_stack *); \
PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *);\

View File

@ -7256,8 +7256,8 @@ Returns: pointer to compiled data block, or NULL on error,
*/
PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION
pcre2_compile(PCRE2_SPTR pattern, int patlen, uint32_t options,
int *errorptr, PCRE2_OFFSET *erroroffset, pcre2_compile_context *ccontext)
pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
{
BOOL utf; /* Set TRUE for UTF mode */
pcre2_real_code *re = NULL; /* What we will return */
@ -7324,10 +7324,12 @@ if (ccontext == NULL)
ccontext = &default_context;
}
/* A negative pattern length means "zero-terminated". Otherwise, we make
a copy of the pattern and add a zero. */
/* A zero-terminated pattern is indicated by the special length value
PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero,
to ensure that it is always possible to look one code unit beyond the end of
the pattern's characters. */
if (patlen < 0) patlen = PRIV(strlen)(pattern); else
if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern); else
{
if (patlen < COPIED_PATTERN_SIZE)
copied_pattern = stack_copied_pattern;
@ -7473,7 +7475,7 @@ if (utf)
goto HAD_ERROR;
}
if ((options & PCRE2_NO_UTF_CHECK) == 0 &&
(errorcode = PRIV(valid_utf)(pattern, -1, erroroffset)) != 0)
(errorcode = PRIV(valid_utf)(pattern, patlen, erroroffset)) != 0)
goto HAD_ERROR;
}

View File

@ -136,7 +136,7 @@ switch (what)
break;
case PCRE2_CONFIG_STACKRECURSE:
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
*((int *)where) = 0;
#else
*((int *)where) = 1;

View File

@ -164,7 +164,7 @@ if (defmemctl)
mcontext->memctl.free = default_free;
mcontext->memctl.memory_data = NULL;
}
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
mcontext->stack_memctl = mcontext->memctl;
#endif
mcontext->callout = NULL;
@ -389,7 +389,7 @@ pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
void *mydata)
{
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
mcontext->stack_memctl.malloc = mymalloc;
mcontext->stack_memctl.free = myfree;
mcontext->stack_memctl.memory_data = mydata;

View File

@ -365,8 +365,8 @@ internal_dfa_match(
dfa_match_block *mb,
PCRE2_SPTR this_start_code,
PCRE2_SPTR current_subject,
PCRE2_OFFSET start_offset,
PCRE2_OFFSET *offsets,
PCRE2_SIZE start_offset,
PCRE2_SIZE *offsets,
uint32_t offsetcount,
int *workspace,
int wscount,
@ -730,7 +730,7 @@ for (;;)
else if (match_count > 0 && ++match_count * 2 > (int)offsetcount)
match_count = 0;
count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2;
if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
if (count > 0) memmove(offsets + 2, offsets, count * sizeof(PCRE2_SIZE));
if (offsetcount >= 2)
{
offsets[0] = (int)(current_subject - start_subject);
@ -2560,7 +2560,7 @@ for (;;)
case OP_ASSERTBACK_NOT:
{
PCRE2_SPTR endasscode = code + GET(code, 1);
PCRE2_OFFSET local_offsets[2];
PCRE2_SIZE local_offsets[2];
int rc;
int local_workspace[1000];
@ -2572,7 +2572,7 @@ for (;;)
ptr, /* where we currently are */
(int)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_OFFSET), /* size of same */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
@ -2587,7 +2587,7 @@ for (;;)
case OP_COND:
case OP_SCOND:
{
PCRE2_OFFSET local_offsets[1000];
PCRE2_SIZE local_offsets[1000];
int local_workspace[1000];
int codelink = GET(code, 1);
int condcode;
@ -2606,9 +2606,9 @@ for (;;)
cb.callout_number = code[LINK_SIZE+2];
cb.offset_vector = offsets;
cb.subject = start_subject;
cb.subject_length = (int)(end_subject - start_subject);
cb.start_match = (int)(current_subject - start_subject);
cb.current_position = (int)(ptr - start_subject);
cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
cb.pattern_position = GET(code, LINK_SIZE + 3);
cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
cb.capture_top = 1;
@ -2664,7 +2664,7 @@ for (;;)
ptr, /* where we currently are */
(int)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_OFFSET), /* size of same */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
@ -2683,7 +2683,7 @@ for (;;)
case OP_RECURSE:
{
dfa_recursion_info *ri;
PCRE2_OFFSET local_offsets[1000];
PCRE2_SIZE local_offsets[1000];
int local_workspace[1000];
PCRE2_SPTR callpat = start_code + GET(code, 1);
uint32_t recno = (callpat == mb->start_code)? 0 :
@ -2712,7 +2712,7 @@ for (;;)
ptr, /* where we currently are */
(int)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_OFFSET), /* size of same */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
@ -2777,7 +2777,7 @@ for (;;)
for (matched_count = 0;; matched_count++)
{
PCRE2_OFFSET local_offsets[2];
PCRE2_SIZE local_offsets[2];
int local_workspace[1000];
int rc = internal_dfa_match(
@ -2786,7 +2786,7 @@ for (;;)
local_ptr, /* where we currently are */
(int)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_OFFSET), /* size of same */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
@ -2849,7 +2849,7 @@ for (;;)
case OP_ONCE:
case OP_ONCE_NC:
{
PCRE2_OFFSET local_offsets[2];
PCRE2_SIZE local_offsets[2];
int local_workspace[1000];
int rc = internal_dfa_match(
@ -2858,7 +2858,7 @@ for (;;)
ptr, /* where we currently are */
(int)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_OFFSET), /* size of same */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
@ -2948,9 +2948,9 @@ for (;;)
cb.callout_number = code[1];
cb.offset_vector = offsets;
cb.subject = start_subject;
cb.subject_length = (int)(end_subject - start_subject);
cb.start_match = (int)(current_subject - start_subject);
cb.current_position = (int)(ptr - start_subject);
cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
cb.pattern_position = GET(code, 2);
cb.next_item_length = GET(code, 2 + LINK_SIZE);
cb.capture_top = 1;
@ -3049,8 +3049,8 @@ Returns: > 0 => number of match offset pairs placed in offsets
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, int length,
PCRE2_OFFSET start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext, int *workspace, size_t wscount)
{
const pcre2_real_code *re = (const pcre2_real_code *)code;
@ -3078,9 +3078,10 @@ is used below, and it expects NLBLOCK to be defined as a pointer. */
dfa_match_block actual_match_block;
dfa_match_block *mb = &actual_match_block;
/* A negative length implies a zero-terminated subject string. */
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
subject string. */
if (length < 0) length = PRIV(strlen)(subject);
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
/* Plausibility checks */
@ -3088,7 +3089,7 @@ if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
return PCRE2_ERROR_NULL;
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
if ((int)start_offset > length) return PCRE2_ERROR_BADOFFSET;
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
@ -3214,7 +3215,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
return match_data->rc;
}
#if PCRE2_CODE_UNIT_WIDTH != 32
if (start_offset > 0 && (int)start_offset < length &&
if (start_offset > 0 && start_offset < length &&
NOT_FIRSTCHAR(subject[start_offset]))
return PCRE2_ERROR_BADUTFOFFSET;
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
@ -3466,12 +3467,12 @@ for (;;)
{
if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0)
{
match_data->ovector[0] = (PCRE2_OFFSET)(start_match - subject);
match_data->ovector[1] = (PCRE2_OFFSET)(end_subject - subject);
match_data->ovector[0] = (PCRE2_SIZE)(start_match - subject);
match_data->ovector[1] = (PCRE2_SIZE)(end_subject - subject);
}
match_data->leftchar = (PCRE2_OFFSET)(mb->start_used_ptr - subject);
match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
match_data->rightchar = 0; /* FIXME */
match_data->startchar = (PCRE2_OFFSET)(start_match - subject);
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
match_data->rc = rc;
return rc;
}

View File

@ -1859,7 +1859,7 @@ extern void _pcre2_compile_context_init(pcre2_compile_context *, BOOL);
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
BOOL);
extern int _pcre2_jit_get_size(void *);
extern size_t _pcre2_jit_get_size(void *);
extern void _pcre2_match_context_init(pcre2_match_context *, BOOL);
extern void *_pcre2_memctl_malloc(size_t, pcre2_memctl *);
extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *);
@ -1869,7 +1869,7 @@ extern int _pcre2_strlen(PCRE2_SPTR);
extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t);
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
extern int _pcre2_study(pcre2_real_code *);
extern int _pcre2_valid_utf(PCRE2_SPTR, int, PCRE2_OFFSET *);
extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
BOOL);
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);

View File

@ -565,7 +565,7 @@ typedef struct pcre2_real_compile_context {
typedef struct pcre2_real_match_context {
pcre2_memctl memctl;
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
pcre2_memctl stack_memctl;
#endif
int (*callout)(pcre2_callout_block *);
@ -609,12 +609,12 @@ typedef struct pcre2_real_match_data {
const pcre2_real_code *code; /* The pattern used for the match */
PCRE2_SPTR subject; /* The subject that was matched */
int rc; /* The return code from the match */
PCRE2_OFFSET leftchar; /* Offset to leftmost code unit */
PCRE2_OFFSET rightchar; /* Offset to rightmost code unit */
PCRE2_OFFSET startchar; /* Offset to starting code unit */
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
PCRE2_SIZE startchar; /* Offset to starting code unit */
PCRE2_SPTR mark; /* Pointer to last mark */
uint16_t oveccount; /* Number of pairs */
PCRE2_OFFSET ovector[1]; /* The first field */
PCRE2_SIZE ovector[1]; /* The first field */
} pcre2_real_match_data;
@ -686,12 +686,12 @@ typedef struct compile_block {
call within the pattern; used by pcre_match(). */
typedef struct recursion_info {
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
unsigned int group_num; /* Number of group that was called */
PCRE2_OFFSET *offset_save; /* Pointer to start of saved offsets */
uint32_t saved_max; /* Number of saved offsets */
uint32_t saved_capture_last; /* Last capture number */
PCRE2_SPTR subject_position; /* Position at start of recursion */
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
unsigned int group_num; /* Number of group that was called */
PCRE2_SIZE *ovec_save; /* Pointer to start of saved ovector */
uint32_t saved_max; /* Number of saved offsets */
uint32_t saved_capture_last; /* Last capture number */
PCRE2_SPTR subject_position; /* Position at start of recursion */
} recursion_info;
/* A similar structure for pcre_dfa_match(). */
@ -717,7 +717,7 @@ doing traditional NFA matching (pcre2_match() and friends). */
typedef struct match_block {
pcre2_memctl memctl; /* For general use */
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
pcre2_memctl stack_memctl; /* For "stack" frames */
#endif
uint32_t match_call_count; /* As it says */
@ -728,11 +728,11 @@ typedef struct match_block {
const uint8_t *lcc; /* Points to lower casing table */
const uint8_t *fcc; /* Points to case-flipping table */
const uint8_t *ctypes; /* Points to table of type maps */
PCRE2_OFFSET *ovector; /* Pointer to the offset vector */
PCRE2_OFFSET offset_end; /* One past the end */
PCRE2_OFFSET offset_max; /* The maximum usable for return data */
PCRE2_OFFSET start_offset; /* The start offset value */
PCRE2_OFFSET end_offset_top; /* Highwater mark at end of match */
PCRE2_SIZE *ovector; /* Pointer to the offset vector */
PCRE2_SIZE offset_end; /* One past the end */
PCRE2_SIZE offset_max; /* The maximum usable for return data */
PCRE2_SIZE start_offset; /* The start offset value */
PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */
uint16_t partial; /* PARTIAL options */
uint16_t bsr_convention; /* \R interpretation */
uint16_t name_count; /* Number of names in name table */
@ -760,7 +760,7 @@ typedef struct match_block {
recursion_info *recursive; /* Linked list of recursion data */
void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
void *match_frames_base; /* For remembering malloc'd frames */
#endif
} match_block;
@ -769,22 +769,22 @@ typedef struct match_block {
functions. */
typedef struct dfa_match_block {
pcre2_memctl memctl; /* For general use */
PCRE2_SPTR start_code; /* Start of the compiled pattern */
PCRE2_SPTR start_subject ; /* Start of the subject string */
PCRE2_SPTR end_subject; /* End of subject string */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
const uint8_t *tables; /* Character tables */
PCRE2_OFFSET start_offset; /* The start offset value */
uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
uint16_t bsr_convention; /* \R interpretation */
void *callout_data; /* To pass back to callouts */
pcre2_memctl memctl; /* For general use */
PCRE2_SPTR start_code; /* Start of the compiled pattern */
PCRE2_SPTR start_subject ; /* Start of the subject string */
PCRE2_SPTR end_subject; /* End of subject string */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
const uint8_t *tables; /* Character tables */
PCRE2_SIZE start_offset; /* The start offset value */
uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
uint16_t bsr_convention; /* \R interpretation */
void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
dfa_recursion_info *recursive; /* Linked list of recursion data */
dfa_recursion_info *recursive; /* Linked list of recursion data */
} dfa_match_block;
#endif /* PCRE2_PCRE2TEST */

View File

@ -50,11 +50,11 @@ POSSIBILITY OF SUCH DAMAGE.
* JIT compile a Regular Expression *
*************************************************/
/* This function used JIT to convert a previously-compiled pattern into machine
/* This function used JIT to convert a previously-compiled pattern into machine
code.
Arguments:
code a compiled pattern
code a compiled pattern
options JIT option bits
Returns: nothing
@ -62,16 +62,21 @@ Returns: nothing
/* FIXME: this is currently a placeholder function */
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_compile(pcre2_code *code, uint32_t options)
{
#ifndef SUPPORT_JIT
(void)code;
(void)options;
return PCRE2_ERROR_JIT_BADOPTION;
#else /* SUPPORT_JIT */
code=code; options = options; /* Dummy.... */
/* Dummy code */
code=code;
options = options;
return PCRE2_ERROR_JIT_BADOPTION;
#endif /* SUPPORT_JIT */
}

View File

@ -71,11 +71,12 @@ Returns: > 0 => success; value is the number of ovector pairs filled
/* FIXME: this is currently a placeholder function */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, int length,
PCRE2_OFFSET start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext, pcre2_jit_stack *jit_stack)
{
#ifndef SUPPORT_JIT
(void)code;
(void)subject;
(void)length;
@ -85,17 +86,21 @@ pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, int length,
(void)mcontext;
(void)jit_stack;
return PCRE2_ERROR_JIT_BADOPTION;
#else /* SUPPORT_JIT */
/* Dummy code */
code=code;subject=subject;length=length;
start_offset=start_offset; options=options; match_data=match_data;
code=code;
subject=subject;
length=length;
start_offset=start_offset;
options=options;
match_data=match_data;
mcontext=mcontext;
jit_stack=jit_stack;
return PCRE2_ERROR_JIT_BADOPTION;
#endif /* SUPPORT_JIT */
}
}
/* End of pcre2_jit_match.c */

View File

@ -54,14 +54,18 @@ POSSIBILITY OF SUCH DAMAGE.
*************************************************/
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_jit_free_unused_memory(pcre2_general_context *gcontext)
{
#ifndef SUPPORT_JIT
(void)gcontext; /* Suppress warning */
#else /* SUPPORT_JIT */
gcontext=gcontext; /* Dummy */
/* Dummy code */
gcontext=gcontext;
#endif /* SUPPORT_JIT */
}
@ -71,19 +75,25 @@ gcontext=gcontext; /* Dummy */
* Allocate a JIT stack *
*************************************************/
PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION
pcre2_jit_stack_alloc(pcre2_general_context *gcontext, size_t startsize,
PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION
pcre2_jit_stack_alloc(pcre2_general_context *gcontext, size_t startsize,
size_t maxsize)
{
#ifndef SUPPORT_JIT
(void)gcontext;
(void)startsize;
(void)maxsize;
return NULL;
#else /* SUPPORT_JIT */
gcontext=gcontext;startsize=startsize;maxsize=maxsize;
/* Dummy code */
gcontext=gcontext;
startsize=startsize;
maxsize=maxsize;
return NULL;
#endif
}
@ -97,12 +107,18 @@ pcre2_jit_stack_assign(const pcre2_code *code, pcre2_jit_callback callback,
void *callback_data)
{
#ifndef SUPPORT_JIT
(void)code;
(void)callback;
(void)callback_data;
#else /* SUPPORT_JIT */
code=code;callback=callback;callback_data=callback_data;
/* Dummy code */
code=code;
callback=callback;
callback_data=callback_data;
#endif /* SUPPORT_JIT */
}
@ -115,10 +131,14 @@ PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_jit_stack_free(pcre2_jit_stack *jit_stack)
{
#ifndef SUPPORT_JIT
(void)jit_stack;
#else /* SUPPORT_JIT */
/* Dummy code */
jit_stack=jit_stack;
#endif /* SUPPORT_JIT */
}
@ -127,16 +147,20 @@ jit_stack=jit_stack;
* Get size of JIT code *
*************************************************/
int
size_t
PRIV(jit_get_size)(void *executable_jit)
{
#ifndef SUPPORT_JIT
(void)executable_jit;
return 0;
#else /* SUPPORT_JIT */
/* Dummy code */
executable_jit = executable_jit;
return 0; /* FIXME */
return 0;
#endif
}

View File

@ -104,17 +104,18 @@ for any one of them can use a range. */
#define MATCH_BACKTRACK_MAX MATCH_THEN
#define MATCH_BACKTRACK_MIN MATCH_COMMIT
/* Maximum number of ints of offset to save on the stack for recursive calls.
If the offset vector is bigger, malloc is used. This should be a multiple of 3,
because the offset vector is always a multiple of 3 long. */
#define REC_STACK_SAVE_MAX 30
/* Min and max values for the common repeats; for the maxima, 0 => infinity */
static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
/* Maximum number of ovector elements that can be saved on the system stack
when processing OP_RECURSE in non-HEAP_MATCH_RECURSE mode. If the ovector is
bigger, malloc() is used. This value should be a multiple of 3, because the
ovector length is always a multiple of 3. */
#define OP_RECURSE_STACK_SAVE_MAX 45
/*************************************************
@ -129,11 +130,11 @@ subject bytes matched may be different to the number of reference bytes.
Arguments:
offset index into the offset vector
eptr pointer into the subject
length length of reference to be matched (number of bytes)
length length of reference to be matched (number of code units)
mb points to match block
caseless TRUE if caseless
Returns: >= 0 the number of subject bytes matched
Returns: >= 0 the number of subject code units matched
-1 no match
-2 partial match; always given if at end subject
*/
@ -230,7 +231,7 @@ return (int)(eptr - eptr_start);
RECURSION IN THE match() FUNCTION
The match() function is highly recursive, though not every recursive call
increases the recursive depth. Nevertheless, some regular expressions can cause
increases the recursion depth. Nevertheless, some regular expressions can cause
it to recurse to a great depth. I was writing for Unix, so I just let it call
itself recursively. This uses the stack for saving everything that has to be
saved for a recursive call. On Unix, the stack can be large, and this works
@ -241,9 +242,9 @@ programs that use a lot of stack. (This despite the fact that every last chip
has oodles of memory these days, and techniques for extending the stack have
been known for decades.) So....
There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
calls by keeping local variables that need to be preserved in blocks of memory
obtained from malloc() instead instead of on the stack. Macros are used to
There is a fudge, triggered by defining HEAP_MATCH_RECURSE, which avoids
recursive calls by keeping local variables that need to be preserved in blocks
of memory on the heap instead instead of on the stack. Macros are used to
achieve this so that the actual code doesn't look very different to what it
always used to.
@ -274,11 +275,10 @@ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
RM61, RM62, RM63, RM64, RM65, RM66, RM67 };
/* These versions of the macros use the stack, as normal. There are debugging
versions and production versions. Note that the "rw" argument of RMATCH isn't
actually used in this definition. */
/* These versions of the macros use the stack, as normal. Note that the "rw"
argument of RMATCH isn't actually used in this definition. */
#ifndef NO_RECURSE
#ifndef HEAP_MATCH_RECURSE
#define REGISTER register
#define RMATCH(ra,rb,rc,rd,re,rw) \
rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
@ -350,10 +350,9 @@ typedef struct heapframe {
eptrblock *Xeptrb;
PCRE2_OFFSET Xoffset;
PCRE2_OFFSET Xoffset_top;
PCRE2_OFFSET Xstacksave[REC_STACK_SAVE_MAX];
PCRE2_OFFSET Xsave_offset1, Xsave_offset2, Xsave_offset3;
PCRE2_SIZE Xoffset;
PCRE2_SIZE Xoffset_top;
PCRE2_SIZE Xsave_offset1, Xsave_offset2, Xsave_offset3;
uint32_t Xfc;
uint32_t Xnumber;
@ -395,6 +394,99 @@ typedef struct heapframe {
***************************************************************************/
/* When HEAP_MATCH_RECURSE is not defined, the match() function implements
backtrack points by calling itself recursively in all but one case. The one
special case is when processing OP_RECURSE, which specifies recursion in the
pattern. The entire ovector must be saved and restored while processing
OP_RECURSE. If the ovector is small enough, instead of calling match()
directly, op_recurse_ovecsave() is called. This function uses the system stack
to save the ovector while calling match() to process the pattern recursion. */
#ifndef HEAP_MATCH_RECURSE
/* We need a prototype for match() because it is mutually recursive with
op_recurse_ovecsave(). */
static int
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth);
/*************************************************
* Process OP_RECURSE, stacking ovector *
*************************************************/
/* When this function is called, mb->recursive has already been updated to
point to a new recursion data block, and all its fields other than ovec_save
have been set.
Arguments:
eptr pointer to current character in subject
callpat the recursion point in the pattern
mstart pointer to the current match start position (can be modified
by encountering \K)
offset_top current top pointer
mb pointer to "static" info block for the match
eptrb pointer to chain of blocks containing eptr at start of
brackets - for testing for empty matches
rdepth the recursion depth
Returns: a match() return code
*/
static int
op_recurse_ovecsave(REGISTER PCRE2_SPTR eptr, PCRE2_SPTR callpat,
PCRE2_SPTR mstart, PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb,
uint32_t rdepth)
{
register int rrc;
BOOL cbegroup = *callpat >= OP_SBRA;
recursion_info *new_recursive = mb->recursive;
PCRE2_SIZE ovecsave[OP_RECURSE_STACK_SAVE_MAX];
/* Save the ovector */
new_recursive->ovec_save = ovecsave;
memcpy(ovecsave, mb->ovector, new_recursive->saved_max * sizeof(PCRE2_SIZE));
/* Do the recursion. After processing each alternative, restore the ovector
data and the last captured value. */
do
{
if (cbegroup) mb->match_function_type = MATCH_CBEGROUP;
rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
mb, eptrb, rdepth + 1);
memcpy(mb->ovector, new_recursive->ovec_save,
new_recursive->saved_max * sizeof(PCRE2_SIZE));
mb->capture_last = new_recursive->saved_capture_last;
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) return rrc;
/* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
recursion; they cause a NOMATCH for the entire recursion. These codes
are defined in a range that can be tested for. */
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
return MATCH_NOMATCH;
/* Any return code other than NOMATCH is an error. Otherwise, advance to the
next alternative or to the end of the recursing subpattern. If there were
nested recursions, mb->recursive might be changed, so reset it before
looping. */
if (rrc != MATCH_NOMATCH) return rrc;
mb->recursive = new_recursive;
callpat += GET(callpat, 1);
}
while (*callpat == OP_ALT); /* Loop for the alternatives */
/* None of the alternatives matched. */
return MATCH_NOMATCH;
}
#endif /* HEAP_MATCH_RECURSE */
/*************************************************
* Match from current position *
@ -451,9 +543,8 @@ Returns: MATCH_MATCH if matched ) these values are >= 0
*/
static int
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode,
PCRE2_SPTR mstart, PCRE2_OFFSET offset_top, match_block *mb, eptrblock *eptrb,
uint32_t rdepth)
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth)
{
/* These variables do not need to be preserved over recursion in this function,
so they can be ordinary variables in all cases. Mark some of them with
@ -475,7 +566,7 @@ whenever RMATCH() does a "recursion". See the macro definitions above. Putting
the top-level on the stack rather than malloc-ing them all gives a performance
boost in many cases where there is not much "recursion". */
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
heapframe *frame = (heapframe *)mb->match_frames_base;
/* Copy in the original argument variables */
@ -535,7 +626,6 @@ HEAP_RECURSE:
#define save_offset1 frame->Xsave_offset1
#define save_offset2 frame->Xsave_offset2
#define save_offset3 frame->Xsave_offset3
#define stacksave frame->Xstacksave
#define condition frame->Xcondition
#define cur_is_word frame->Xcur_is_word
@ -543,11 +633,11 @@ HEAP_RECURSE:
#define newptrb frame->Xnewptrb
/* When recursion is being used, local variables are allocated on the stack and
get preserved during recursion in the normal way. In this environment, fi and
i, and fc and c, can be the same variables. */
/* When normal stack-based recursion is being used for match(), local variables
are allocated on the stack and get preserved during recursion in the usual way.
In this environment, fi and i, and fc and c, can be the same variables. */
#else /* NO_RECURSE not defined */
#else /* HEAP_MATCH_RECURSE not defined */
#define fi i
#define fc c
@ -569,9 +659,8 @@ PCRE2_SPTR pp;
PCRE2_SPTR prev;
PCRE2_SPTR saved_eptr;
PCRE2_OFFSET offset;
PCRE2_OFFSET stacksave[REC_STACK_SAVE_MAX];
PCRE2_OFFSET save_offset1, save_offset2, save_offset3;
PCRE2_SIZE offset;
PCRE2_SIZE save_offset1, save_offset2, save_offset3;
uint32_t number;
uint32_t op;
@ -597,7 +686,7 @@ BOOL prev_is_word;
eptrblock newptrb;
recursion_info new_recursive;
#endif /* NO_RECURSE not defined */
#endif /* HEAP_MATCH_RECURSE not defined */
/* To save space on the stack and in the heap frame, I have doubled up on some
of the local variables that are used only in localised parts of the code, but
@ -622,19 +711,19 @@ prop_fail_result = 0;
/* This label is used for tail recursion, which is used in a few cases even
when NO_RECURSE is not defined, in order to reduce the amount of stack that is
used. Thanks to Ian Taylor for noticing this possibility and sending the
original patch. */
when HEAP_MATCH_RECURSE is not defined, in order to reduce the amount of stack
that is used. Thanks to Ian Taylor for noticing this possibility and sending
the original patch. */
TAIL_RECURSE:
/* OK, now we can get on with the real code of the function. Recursive calls
are specified by the macro RMATCH and RRETURN is used to return. When
NO_RECURSE is *not* defined, these just turn into a recursive call to match()
and a "return", respectively. However, RMATCH isn't like a function call
because it's quite a complicated macro. It has to be used in one particular
way. This shouldn't, however, impact performance when true recursion is being
used. */
HEAP_MATCH_RECURSE is *not* defined, these just turn into a recursive call to
match() and a "return", respectively. However, RMATCH isn't like a function
call because it's quite a complicated macro. It has to be used in one
particular way. This shouldn't, however, impact performance when true recursion
is being used. */
#ifdef SUPPORT_UTF
utf = (mb->poptions & PCRE2_UTF) != 0;
@ -668,7 +757,7 @@ if (mb->match_function_type == MATCH_CBEGROUP)
mb->match_function_type = 0;
}
/* Now start processing the opcodes. */
/* Now, at last, we can start processing the opcodes. */
for (;;)
{
@ -1205,9 +1294,9 @@ for (;;)
cb.callout_number = ecode[1];
cb.offset_vector = mb->ovector;
cb.subject = mb->start_subject;
cb.subject_length = (int)(mb->end_subject - mb->start_subject);
cb.start_match = (int)(mstart - mb->start_subject);
cb.current_position = (int)(eptr - mb->start_subject);
cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject);
cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
cb.pattern_position = GET(ecode, 2);
cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
cb.capture_top = offset_top/2;
@ -1230,7 +1319,7 @@ for (;;)
condition = FALSE;
switch(condcode = *ecode)
{
case OP_RREF: /* Numbered group recursion test */
case OP_RREF: /* Numbered group recursion test */
if (mb->recursive != NULL) /* Not recursing => FALSE */
{
uint32_t recno = GET2(ecode, 1); /* Recursion group number*/
@ -1588,9 +1677,9 @@ for (;;)
cb.callout_number = ecode[1];
cb.offset_vector = mb->ovector;
cb.subject = mb->start_subject;
cb.subject_length = (int)(mb->end_subject - mb->start_subject);
cb.start_match = (int)(mstart - mb->start_subject);
cb.current_position = (int)(eptr - mb->start_subject);
cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject);
cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
cb.pattern_position = GET(ecode, 2);
cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
cb.capture_top = offset_top/2;
@ -1613,7 +1702,7 @@ for (;;)
all the potential data. There may be up to 65535 such values, which is too
large to put on the stack, but using malloc for small numbers seems
expensive. As a compromise, the stack is used when there are no more than
REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
OP_RECURSE_STACK_SAVE_MAX values to store; otherwise malloc is used.
There are also other values that have to be saved. We use a chained
sequence of blocks that actually live on the stack. Thanks to Robin Houston
@ -1626,12 +1715,11 @@ for (;;)
uint32_t recno;
callpat = mb->start_code + GET(ecode, 1);
recno = (callpat == mb->start_code)? 0 :
GET2(callpat, 1 + LINK_SIZE);
recno = (callpat == mb->start_code)? 0 : GET2(callpat, 1 + LINK_SIZE);
/* Check for repeating a recursion without advancing the subject pointer.
This should catch convoluted mutual recursions. (Some simple cases are
caught at compile time.) */
/* Check for repeating a pattern recursion without advancing the subject
pointer. This should catch convoluted mutual recursions. (Some simple
cases are caught at compile time.) */
for (ri = mb->recursive; ri != NULL; ri = ri->prevrec)
if (recno == ri->group_num && eptr == ri->subject_position)
@ -1641,6 +1729,7 @@ for (;;)
new_recursive.group_num = recno;
new_recursive.saved_capture_last = mb->capture_last;
new_recursive.saved_max = mb->offset_end;
new_recursive.subject_position = eptr;
new_recursive.prevrec = mb->recursive;
mb->recursive = &new_recursive;
@ -1649,78 +1738,93 @@ for (;;)
ecode += 1 + LINK_SIZE;
/* Now save the offset data */
new_recursive.saved_max = mb->offset_end;
if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
new_recursive.offset_save = stacksave;
else
/* When we are using the system stack for match() recursion we can call a
function that uses the system stack for preserving the ovector while
processing the pattern recursion, but only if the ovector is small
enough. */
#ifndef HEAP_MATCH_RECURSE
if (new_recursive.saved_max <= OP_RECURSE_STACK_SAVE_MAX)
{
new_recursive.offset_save = (PCRE2_OFFSET *)
(mb->memctl.malloc(new_recursive.saved_max * sizeof(PCRE2_OFFSET),
mb->memctl.memory_data));
if (new_recursive.offset_save == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
eptrb, rdepth);
mb->recursive = new_recursive.prevrec;
if (rrc != MATCH_MATCH && rrc != MATCH_ACCEPT) RRETURN(rrc);
/* Set where we got to in the subject, and reset the start, in case
it was changed by \K. This *is* propagated back out of a recursion,
for Perl compatibility. */
eptr = mb->end_match_ptr;
mstart = mb->start_match_ptr;
break; /* End of processing OP_RECURSE */
}
memcpy(new_recursive.offset_save, mb->ovector,
new_recursive.saved_max * sizeof(PCRE2_OFFSET));
/* OK, now we can do the recursion. After processing each alternative,
restore the offset data and the last captured value. If there were nested
recursions, mb->recursive might be changed, so reset it before looping.
*/
#endif
/* If the ovector is too big, or if we are using the heap for match()
recursion, we have to use the heap for saving the ovector. */
new_recursive.ovec_save = (PCRE2_SIZE *)
(mb->memctl.malloc(new_recursive.saved_max * sizeof(PCRE2_SIZE),
mb->memctl.memory_data));
if (new_recursive.ovec_save == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
memcpy(new_recursive.ovec_save, mb->ovector,
new_recursive.saved_max * sizeof(PCRE2_SIZE));
/* Do the recursion. After processing each alternative, restore the
ovector data and the last captured value. This code has the same overall
logic as the code in the op_recurse_ovecsave() function, but is adapted
to use RMATCH/RRETURN and to release the heap block containing the saved
ovector. */
cbegroup = (*callpat >= OP_SBRA);
do
{
if (cbegroup) mb->match_function_type = MATCH_CBEGROUP;
RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
mb, eptrb, RM6);
memcpy(mb->ovector, new_recursive.offset_save,
new_recursive.saved_max * sizeof(PCRE2_OFFSET));
memcpy(mb->ovector, new_recursive.ovec_save,
new_recursive.saved_max * sizeof(PCRE2_SIZE));
mb->capture_last = new_recursive.saved_capture_last;
mb->recursive = new_recursive.prevrec;
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
{
if (new_recursive.offset_save != stacksave)
mb->memctl.free(new_recursive.offset_save, mb->memctl.memory_data);
/* Set where we got to in the subject, and reset the start in case
mb->memctl.free(new_recursive.ovec_save, mb->memctl.memory_data);
/* Set where we got to in the subject, and reset the start, in case
it was changed by \K. This *is* propagated back out of a recursion,
for Perl compatibility. */
eptr = mb->end_match_ptr;
mstart = mb->start_match_ptr;
goto RECURSION_MATCHED; /* Exit loop; end processing */
}
/* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
recursion; they cause a NOMATCH for the entire recursion. These codes
are defined in a range that can be tested for. */
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
RRETURN(MATCH_NOMATCH);
/* Any return code other than NOMATCH is an error. */
if (rrc != MATCH_NOMATCH)
{
if (new_recursive.offset_save != stacksave)
mb->memctl.free(new_recursive.offset_save, mb->memctl.memory_data);
RRETURN(rrc);
{
rrc = MATCH_NOMATCH;
goto RECURSION_RETURN;
}
/* Any return code other than NOMATCH is an error. */
if (rrc != MATCH_NOMATCH) goto RECURSION_RETURN;
mb->recursive = &new_recursive;
callpat += GET(callpat, 1);
}
while (*callpat == OP_ALT);
RECURSION_RETURN:
mb->recursive = new_recursive.prevrec;
if (new_recursive.offset_save != stacksave)
mb->memctl.free(new_recursive.offset_save, mb->memctl.memory_data);
RRETURN(MATCH_NOMATCH);
mb->memctl.free(new_recursive.ovec_save, mb->memctl.memory_data);
RRETURN(rrc);
}
RECURSION_MATCHED:
RECURSION_MATCHED:
break;
/* An alternation is the end of a branch; scan along to find the end of the
@ -1840,8 +1944,8 @@ for (;;)
if (offset > offset_top)
{
register PCRE2_OFFSET *iptr = mb->ovector + offset_top;
register PCRE2_OFFSET *iend = mb->ovector + offset;
register PCRE2_SIZE *iptr = mb->ovector + offset_top;
register PCRE2_SIZE *iend = mb->ovector + offset;
while (iptr < iend) *iptr++ = PCRE2_UNSET;
}
@ -6023,7 +6127,7 @@ for (;;)
match(), the RRETURN() macro jumps here. The number that is saved in
frame->Xwhere indicates which label we actually want to return to. */
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
#define LBL(val) case val: goto L_RM##val;
HEAP_RETURN:
switch (frame->Xwhere)
@ -6048,7 +6152,7 @@ switch (frame->Xwhere)
return PCRE2_ERROR_INTERNAL;
}
#undef LBL
#endif /* NO_RECURSE */
#endif /* HEAP_MATCH_RECURSE */
}
@ -6058,7 +6162,7 @@ switch (frame->Xwhere)
Undefine all the macros that were defined above to handle this. */
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
#undef eptr
#undef ecode
#undef mstart
@ -6091,10 +6195,9 @@ Undefine all the macros that were defined above to handle this. */
#undef save_offset1
#undef save_offset2
#undef save_offset3
#undef stacksave
#undef newptrb
#endif /* NO_RECURSE */
#endif /* HEAP_MATCH_RECURSE */
/* These two are defined as macros in both cases */
@ -6105,7 +6208,7 @@ Undefine all the macros that were defined above to handle this. */
***************************************************************************/
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
/*************************************************
* Release allocated heap frames *
*************************************************/
@ -6131,7 +6234,7 @@ while (nextframe != NULL)
mb->stack_memctl.free(oldframe, mb->stack_memctl.memory_data);
}
}
#endif /* NO_RECURSE */
#endif /* HEAP_MATCH_RECURSE */
@ -6160,8 +6263,8 @@ Returns: > 0 => success; value is the number of ovector pairs filled
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, int length,
PCRE2_OFFSET start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext)
{
int rc;
@ -6198,23 +6301,24 @@ is used below, and it expects NLBLOCK to be defined as a pointer. */
match_block actual_match_block;
match_block *mb = &actual_match_block;
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
heapframe frame_zero;
frame_zero.Xprevframe = NULL; /* Marks the top level */
frame_zero.Xnextframe = NULL; /* None are allocated yet */
mb->match_frames_base = &frame_zero;
#endif
/* A negative length implies a zero-terminated subject string. */
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
subject string. */
if (length < 0) length = PRIV(strlen)(subject);
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
/* Plausibility checks */
if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
if (code == NULL || subject == NULL || match_data == NULL)
return PCRE2_ERROR_NULL;
if ((int)start_offset > length) return PCRE2_ERROR_BADOFFSET;
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
/* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
@ -6261,7 +6365,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
return match_data->rc;
}
#if PCRE2_CODE_UNIT_WIDTH != 32
if (start_offset > 0 && (int)start_offset < length &&
if (start_offset > 0 && start_offset < length &&
NOT_FIRSTCHAR(subject[start_offset]))
return PCRE2_ERROR_BADUTFOFFSET;
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
@ -6296,7 +6400,7 @@ if (mcontext == NULL)
{
mb->callout = NULL;
mb->memctl = re->memctl;
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
mb->stack_memctl = re->memctl;
#endif
}
@ -6305,7 +6409,7 @@ else
mb->callout = mcontext->callout;
mb->callout_data = mcontext->callout_data;
mb->memctl = mcontext->memctl;
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
mb->stack_memctl = mcontext->stack_memctl;
#endif
}
@ -6394,7 +6498,7 @@ offsets, and the top third is working space. */
if (re->top_backref >= match_data->oveccount)
{
ocount = re->top_backref * 3 + 3;
mb->ovector = (PCRE2_OFFSET *)(mb->memctl.malloc(ocount * sizeof(PCRE2_OFFSET),
mb->ovector = (PCRE2_SIZE *)(mb->memctl.malloc(ocount * sizeof(PCRE2_SIZE),
mb->memctl.memory_data));
if (mb->ovector == NULL) return PCRE2_ERROR_NOMEMORY;
using_temporary_offsets = TRUE;
@ -6417,8 +6521,8 @@ in case they inspect these fields. */
if (ocount > 0)
{
register PCRE2_OFFSET *iptr = mb->ovector + ocount;
register PCRE2_OFFSET *iend = iptr - re->top_bracket;
register PCRE2_SIZE *iptr = mb->ovector + ocount;
register PCRE2_SIZE *iend = iptr - re->top_bracket;
if (iend < mb->ovector + 2) iend = mb->ovector + 2;
while (--iptr >= iend) *iptr = PCRE2_UNSET;
mb->ovector[0] = mb->ovector[1] = PCRE2_UNSET;
@ -6782,7 +6886,7 @@ for(;;)
ENDLOOP:
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
release_match_heapframes(&frame_zero, mb);
#endif
@ -6810,7 +6914,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
if (arg_offset_max >= 4)
{
memcpy(match_data->ovector + 2, mb->ovector + 2,
(arg_offset_max - 2) * sizeof(PCRE2_OFFSET));
(arg_offset_max - 2) * sizeof(PCRE2_SIZE));
}
if (mb->end_offset_top > arg_offset_max) mb->capture_last |= OVFLBIT;
mb->memctl.free(mb->ovector, mb->memctl.memory_data);
@ -6834,7 +6938,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
if (mb->end_offset_top/2 <= re->top_bracket)
{
register PCRE2_OFFSET *iptr, *iend;
register PCRE2_SIZE *iptr, *iend;
int resetcount = re->top_bracket + 1;
if (resetcount > match_data->oveccount) resetcount = match_data->oveccount;
iptr = match_data->ovector + mb->end_offset_top;

View File

@ -55,7 +55,7 @@ PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
{
pcre2_match_data *yield = PRIV(memctl_malloc)(
sizeof(pcre2_match_data) + 3*oveccount*sizeof(PCRE2_OFFSET),
sizeof(pcre2_match_data) + 3*oveccount*sizeof(PCRE2_SIZE),
(pcre2_memctl *)gcontext);
yield->oveccount = oveccount;
return yield;
@ -94,7 +94,7 @@ if (match_data != NULL)
* Get left-most code unit in match *
*************************************************/
PCRE2_EXP_DEFN PCRE2_OFFSET PCRE2_CALL_CONVENTION
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
pcre2_get_leftchar(pcre2_match_data *match_data)
{
return match_data->leftchar;
@ -118,7 +118,7 @@ return match_data->mark;
* Get pointer to ovector *
*************************************************/
PCRE2_EXP_DEFN PCRE2_OFFSET * PCRE2_CALL_CONVENTION
PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION
pcre2_get_ovector_pointer(pcre2_match_data *match_data)
{
return match_data->ovector;
@ -142,7 +142,7 @@ return match_data->oveccount;
* Get right-most code unit in match *
*************************************************/
PCRE2_EXP_DEFN PCRE2_OFFSET PCRE2_CALL_CONVENTION
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
pcre2_get_rightchar(pcre2_match_data *match_data)
{
return match_data->rightchar;
@ -154,7 +154,7 @@ return match_data->rightchar;
* Get starting code unit in match *
*************************************************/
PCRE2_EXP_DEFN PCRE2_OFFSET PCRE2_CALL_CONVENTION
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
pcre2_get_startchar(pcre2_match_data *match_data)
{
return match_data->startchar;

View File

@ -335,7 +335,7 @@ size_t *lensp;
pcre2_memctl *memp;
PCRE2_UCHAR **listp;
PCRE2_UCHAR *sp;
PCRE2_OFFSET *ovector;
PCRE2_SIZE *ovector;
if ((count = match_data->rc) < 0) return count;

View File

@ -58,7 +58,7 @@ strings. */
/* This function should never be called when UTF is not supported. */
int
PRIV(valid_utf)(PCRE2_SPTR string, int length, PCRE2_OFFSET *erroroffset)
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
{
(void)string;
(void)length;
@ -81,7 +81,7 @@ invalid string are then undefined.
Arguments:
string points to the string
length length of string, or -1 if the string is zero-terminated
length length of string
errp pointer to an error position offset variable
Returns: == 0 if the string is a valid UTF string
@ -89,17 +89,11 @@ Returns: == 0 if the string is a valid UTF string
*/
int
PRIV(valid_utf)(PCRE2_SPTR string, int length, PCRE2_OFFSET *erroroffset)
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
{
register PCRE2_SPTR p;
register uint32_t c;
if (length < 0)
{
for (p = string; *p != 0; p++);
length = (int)(p - string);
}
/* ----------------- Check a UTF-8 string ----------------- */
#if PCRE2_CODE_UNIT_WIDTH == 8
@ -155,7 +149,7 @@ for (p = string; length-- > 0; p++)
}
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes (1-5) */
if (length < (int)ab) /* Missing bytes */
if (length < ab) /* Missing bytes */
{
*erroroffset = (int)(p - string);
switch(ab - length)

View File

@ -77,8 +77,8 @@ int utf8;
uint32_t option_bits;
uint32_t newline;
PCRE2_OFFSET erroroffset;
PCRE2_OFFSET *ovector;
PCRE2_SIZE erroroffset;
PCRE2_SIZE *ovector;
size_t subject_length;
pcre2_match_data *match_data;
@ -314,7 +314,7 @@ crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
for (;;)
{
uint32_t options = 0; /* Normally no options */
PCRE2_OFFSET start_offset = ovector[1]; /* Start at end of previous match */
PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */
/* If the previous match was for an empty string, we are finished if we are
at the end of the subject. Otherwise, arrange to run another match at the

View File

@ -179,7 +179,7 @@ static uint32_t recursion_limit = 0;
static pcre2_compile_context *compile_context;
static pcre2_match_context *match_context;
static pcre2_match_data *match_data;
static PCRE2_OFFSET *offsets;
static PCRE2_SIZE *offsets;
static BOOL count_only = FALSE;
static BOOL do_colour = FALSE;
@ -1736,14 +1736,14 @@ while (ptr < endptr)
if (line_offsets)
fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
offsets[1] - offsets[0]);
(int)(offsets[1] - offsets[0]));
/* Handle --file-offsets */
else if (file_offsets)
fprintf(stdout, "%d,%d\n",
(int)(filepos + matchptr + offsets[0] - ptr),
offsets[1] - offsets[0]);
(int)(offsets[1] - offsets[0]));
/* Handle --only-matching, which may occur many times */
@ -2451,7 +2451,7 @@ compile_pattern(patstr *p, int options, int popts, int fromfile,
const char *fromtext, int count)
{
unsigned char buffer[PATBUFSIZE];
PCRE2_OFFSET erroffset;
PCRE2_SIZE erroffset;
char *ps = p->string;
unsigned int patlen = strlen(ps);
int errcode;
@ -2485,16 +2485,16 @@ pcre2_get_error_message(errcode, buffer, PATBUFSIZE);
if (fromfile)
{
fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
"at offset %d: %s\n", count, fromtext, erroffset, buffer);
"at offset %d: %s\n", count, fromtext, (int)erroffset, buffer);
}
else
{
if (count == 0)
fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
fromtext, erroffset, buffer);
fromtext, (int)erroffset, buffer);
else
fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
ordin(count), fromtext, erroffset, buffer);
ordin(count), fromtext, (int)erroffset, buffer);
}
return FALSE;

View File

@ -203,7 +203,7 @@ Returns: 0 on success
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
regcomp(regex_t *preg, const char *pattern, int cflags)
{
PCRE2_OFFSET erroffset;
PCRE2_SIZE erroffset;
int errorcode;
int options = 0;
int re_nsub = 0;

View File

@ -401,7 +401,7 @@ typedef struct modstruct {
uint16_t which;
uint16_t type;
uint32_t value;
PCRE2_OFFSET offset;
PCRE2_SIZE offset;
} modstruct;
static modstruct modlist[] = {
@ -1758,7 +1758,7 @@ free(block);
/* For recursion malloc/free, to test stacking calls */
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
static void *my_stack_malloc(size_t size, void *data)
{
void *block = malloc(size);
@ -1775,7 +1775,7 @@ if (show_memory)
fprintf(outfile, "stack_free %p\n", block);
free(block);
}
#endif /* NO_RECURSE */
#endif /* HEAP_MATCH_RECURSE */
/*************************************************
@ -2422,7 +2422,7 @@ static void *
check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
{
void *field = NULL;
PCRE2_OFFSET offset = m->offset;
PCRE2_SIZE offset = m->offset;
if (restrict_for_perl_test) switch(m->which)
{
@ -2448,7 +2448,7 @@ switch (m->which)
/* Fall through for something that can also be in a match context. In this
case the offset is taken from the other field. */
offset = (PCRE2_OFFSET)(m->value);
offset = (PCRE2_SIZE)(m->value);
case MOD_CTM: /* Match context modifier */
if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
@ -3310,7 +3310,7 @@ uint8_t *p = buffer;
const uint8_t *use_tables;
unsigned int delimiter = *p++;
int patlen, errorcode;
PCRE2_OFFSET erroroffset;
PCRE2_SIZE erroroffset;
/* Initialize the context and pattern/data controls for this test from the
defaults. */
@ -4403,7 +4403,7 @@ for (gmatched = 0;; gmatched++)
{
int i;
uint8_t *nptr;
PCRE2_OFFSET *ovector;
PCRE2_SIZE *ovector;
/* This is a check against a lunatic return value. */
@ -4439,8 +4439,8 @@ for (gmatched = 0;; gmatched++)
ovector = FLD(match_data, ovector);
for (i = 0; i < 2*capcount; i += 2)
{
PCRE2_OFFSET start = ovector[i];
PCRE2_OFFSET end = ovector[i+1];
PCRE2_SIZE start = ovector[i];
PCRE2_SIZE end = ovector[i+1];
if (start > end)
{
@ -4643,7 +4643,7 @@ for (gmatched = 0;; gmatched++)
else if (capcount == PCRE2_ERROR_PARTIAL)
{
PCRE2_OFFSET leftchar = FLD(match_data, leftchar);
PCRE2_SIZE leftchar = FLD(match_data, leftchar);
fprintf(outfile, "Partial match");
if (leftchar != FLD(match_data, startchar))
fprintf(outfile, " at offset %d", (int)FLD(match_data, startchar));
@ -4685,8 +4685,8 @@ for (gmatched = 0;; gmatched++)
else if (g_notempty != 0) /* There was a previous null match */
{
uint16_t nl = FLD(compiled_code, newline_convention);
PCRE2_OFFSET start_offset = dat_datctl.offset; /* Where the match was */
PCRE2_OFFSET end_offset = start_offset + 1;
PCRE2_SIZE start_offset = dat_datctl.offset; /* Where the match was */
PCRE2_SIZE end_offset = start_offset + 1;
if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
nl == PCRE2_NEWLINE_ANYCRLF) &&
@ -4765,7 +4765,7 @@ for (gmatched = 0;; gmatched++)
if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
{
PCRE2_OFFSET end_offset = FLD(match_data, ovector)[1];
PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
/* We must now set up for the next iteration of a global search. If we have
matched an empty string, first check to see if we are at the end of the
@ -5278,7 +5278,7 @@ if (test_mode == PCRE8_MODE)
default_dat_context8 = pcre2_match_context_create_8(general_context8);
dat_context8 = pcre2_match_context_create_8(general_context8);
match_data8 = pcre2_match_data_create_8(max_oveccount, general_context8);
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
(void)pcre2_set_recursion_memory_management_8(default_dat_context8,
&my_stack_malloc, &my_stack_free, NULL);
#endif
@ -5295,7 +5295,7 @@ if (test_mode == PCRE16_MODE)
default_dat_context16 = pcre2_match_context_create_16(general_context16);
dat_context16 = pcre2_match_context_create_16(general_context16);
match_data16 = pcre2_match_data_create_16(max_oveccount, general_context16);
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
(void)pcre2_set_recursion_memory_management_16(default_dat_context16,
&my_stack_malloc, &my_stack_free, NULL);
#endif
@ -5312,7 +5312,7 @@ if (test_mode == PCRE32_MODE)
default_dat_context32 = pcre2_match_context_create_32(general_context32);
dat_context32 = pcre2_match_context_create_32(general_context32);
match_data32 = pcre2_match_data_create_32(max_oveccount, general_context32);
#ifdef NO_RECURSE
#ifdef HEAP_MATCH_RECURSE
(void)pcre2_set_recursion_memory_management_32(default_dat_context32,
&my_stack_malloc, &my_stack_free, NULL);
#endif

6
testdata/testinput2 vendored
View File

@ -4042,5 +4042,11 @@ a random value. /Ix
abxxx
ca
cd
# This should test both paths for processing OP_RECURSE.
/(?(R)a+|(?R)b)/
aaaabcde
aaaabcde\=ovector=100
# End of testinput2

View File

@ -13671,5 +13671,13 @@ No match
cd
0:
0+ cd
# This should test both paths for processing OP_RECURSE.
/(?(R)a+|(?R)b)/
aaaabcde
0: aaaab
aaaabcde\=ovector=100
0: aaaab
# End of testinput2