Change lengths to PCRE2_SIZE and revise OP_RECURSE processing.
This commit is contained in:
parent
d108fc47ad
commit
7efba85b56
|
@ -429,7 +429,7 @@ endif # WITH_PCRE8
|
|||
if WITH_JIT
|
||||
TESTS += pcre2_jit_test
|
||||
noinst_PROGRAMS += pcre2_jit_test
|
||||
pcre2_jit_test_SOURCES = pcre2_jit_test.c
|
||||
pcre2_jit_test_SOURCES = src/pcre2_jit_test.c
|
||||
pcre2_jit_test_CFLAGS = $(AM_CFLAGS)
|
||||
pcre2_jit_test_LDADD =
|
||||
if WITH_PCRE8
|
||||
|
|
|
@ -523,10 +523,10 @@ if test "$enable_utf" = "yes"; then
|
|||
fi
|
||||
|
||||
if test "$enable_stack_for_recursion" = "no"; then
|
||||
AC_DEFINE([NO_RECURSE], [], [
|
||||
AC_DEFINE([HEAP_MATCH_RECURSE], [], [
|
||||
PCRE2 uses recursive function calls to handle backtracking while
|
||||
matching. This can sometimes be a problem on systems that have
|
||||
stacks of limited size. Define NO_RECURSE to any value to get a
|
||||
stacks of limited size. Define HEAP_MATCH_RECURSE to any value to get a
|
||||
version that doesn't use recursion in the match() function; instead
|
||||
it creates its own stack by steam using memory from the heap. For more
|
||||
detail, see the comments and other stuff just above the match() function.])
|
||||
|
@ -608,7 +608,7 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT_RECURSION], [$with_match_limit_recursion], [
|
|||
increase the recursion depth. In some environments it is desirable
|
||||
to limit the depth of recursive calls of match() more strictly, in
|
||||
order to restrict the maximum amount of stack (or heap, if
|
||||
NO_RECURSE is defined) that is used. The value of
|
||||
HEAP_MATCH_RECURSE is defined) that is used. The value of
|
||||
MATCH_LIMIT_RECURSION applies only to recursive calls of match(). To
|
||||
have any useful effect, it must be less than the value of
|
||||
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT.
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
/* src/config.h. Generated from config.h.in by configure. */
|
||||
/* src/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* PCRE is written in Standard C, but there are a few non-standard things it
|
||||
/* PCRE2 is written in Standard C, but there are a few non-standard things it
|
||||
can cope with, allowing it to run on SunOS4 and other "close to standard"
|
||||
systems.
|
||||
|
||||
In environments that support the GNU autotools, config.h.in is converted into
|
||||
config.h by the "configure" script. In environments that use CMake,
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE "by
|
||||
config-cmake.in is converted into config.h. If you are going to build PCRE2 "by
|
||||
hand" without using "configure" or CMake, you should copy the distributed
|
||||
config.h.generic to config.h, and edit the macro definitions to be the way you
|
||||
need them. You must then add -DHAVE_CONFIG_H to all of your compile commands,
|
||||
|
@ -24,31 +24,28 @@ macros are listed as a commented #undef in config.h.generic. Macros such as
|
|||
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is defined. If your system has neither bcopy() nor memmove(), make
|
||||
sure both macros are undefined; an emulation function will then be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined (to any
|
||||
value), this is changed so that backslash-R matches only CR, LF, or CRLF.
|
||||
The build-time default can be overridden by the user of PCRE at runtime. */
|
||||
The build-time default can be overridden by the user of PCRE2 at runtime.
|
||||
*/
|
||||
/* #undef BSR_ANYCRLF */
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro to any value. You must also edit the
|
||||
NEWLINE macro below to set a suitable EBCDIC newline, commonly 21 (0x15).
|
||||
On systems that can use "configure" or CMake to set EBCDIC, NEWLINE is
|
||||
automatically adjusted. When EBCDIC is set, PCRE assumes that all input
|
||||
strings are in EBCDIC. If you do not define this macro, PCRE will assume
|
||||
input strings are ASCII or UTF-8/16/32 Unicode. It is not possible to build
|
||||
a version of PCRE that supports both EBCDIC and UTF-8/16/32. */
|
||||
character codes, define this macro to any value. When EBCDIC is set, PCRE2
|
||||
assumes that all input strings are in EBCDIC. If you do not define this
|
||||
macro, PCRE2 will assume input strings are ASCII or UTF-8/16/32 Unicode. It
|
||||
is not possible to build a version of PCRE2 that supports both EBCDIC and
|
||||
UTF-8/16/32. */
|
||||
/* #undef EBCDIC */
|
||||
|
||||
/* In an EBCDIC environment, define this macro to any value to arrange for the
|
||||
NL character to be 0x25 instead of the default 0x15. NL plays the role that
|
||||
LF does in an ASCII/Unicode environment. The value must also be set in the
|
||||
NEWLINE macro below. On systems that can use "configure" or CMake to set
|
||||
EBCDIC_NL25, the adjustment of NEWLINE is automatic. */
|
||||
LF does in an ASCII/Unicode environment. */
|
||||
/* #undef EBCDIC_NL25 */
|
||||
|
||||
/* Define to 1 if you have the `bcopy' function. */
|
||||
|
@ -126,11 +123,19 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
/* #undef HAVE_ZLIB_H */
|
||||
|
||||
/* PCRE2 uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited
|
||||
size. Define HEAP_MATCH_RECURSE to any value to get a version that doesn't
|
||||
use recursion in the match() function; instead it creates its own stack by
|
||||
steam using memory from the heap. For more detail, see the comments and
|
||||
other stuff just above the match() function. */
|
||||
/* #undef HEAP_MATCH_RECURSE */
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
|
||||
for longer patterns in extreme cases. */
|
||||
However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
|
||||
allows for longer patterns in extreme cases. */
|
||||
#ifndef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#endif
|
||||
|
@ -144,7 +149,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
pcre_exec(). There is a runtime interface for setting a different limit.
|
||||
pcre2_match(). There is a runtime interface for setting a different limit.
|
||||
The limit exists in order to catch runaway regular expressions that take
|
||||
for ever to determine that they do not match. The default is set very large
|
||||
so that it does not accidentally catch legitimate cases. */
|
||||
|
@ -155,11 +160,11 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* The above limit applies to all calls of match(), whether or not they
|
||||
increase the recursion depth. In some environments it is desirable to limit
|
||||
the depth of recursive calls of match() more strictly, in order to restrict
|
||||
the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
|
||||
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
|
||||
match(). To have any useful effect, it must be less than the value of
|
||||
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
|
||||
a runtime method for setting a different limit. */
|
||||
the maximum amount of stack (or heap, if HEAP_MATCH_RECURSE is defined)
|
||||
that is used. The value of MATCH_LIMIT_RECURSION applies only to recursive
|
||||
calls of match(). To have any useful effect, it must be less than the value
|
||||
of MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There
|
||||
is a runtime method for setting a different limit. */
|
||||
#ifndef MATCH_LIMIT_RECURSION
|
||||
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
|
||||
#endif
|
||||
|
@ -178,27 +183,14 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define MAX_NAME_SIZE 32
|
||||
#endif
|
||||
|
||||
/* The value of NEWLINE determines the default newline character sequence.
|
||||
PCRE client programs can override this by selecting other values at run
|
||||
time. In ASCII environments, the value can be 10 (LF), 13 (CR), or 3338
|
||||
(CRLF); in EBCDIC environments the value can be 21 or 37 (LF), 13 (CR), or
|
||||
3349 or 3365 (CRLF) because there are two alternative codepoints (0x15 and
|
||||
0x25) that are used as the NL line terminator that is equivalent to ASCII
|
||||
LF. In both ASCII and EBCDIC environments the value can also be -1 (ANY),
|
||||
or -2 (ANYCRLF). */
|
||||
#ifndef NEWLINE
|
||||
#define NEWLINE 10
|
||||
/* The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), and 5
|
||||
(ANYCRLF). */
|
||||
#ifndef NEWLINE_DEFAULT
|
||||
#define NEWLINE_DEFAULT 2
|
||||
#endif
|
||||
|
||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited
|
||||
size. Define NO_RECURSE to any value to get a version that doesn't use
|
||||
recursion in the match() function; instead it creates its own stack by
|
||||
steam using pcre_recurse_malloc() to obtain memory from the heap. For more
|
||||
detail, see the comments and other stuff just above the match() function.
|
||||
*/
|
||||
/* #undef NO_RECURSE */
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "pcre2"
|
||||
|
||||
|
@ -209,7 +201,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 9.00-DEV"
|
||||
#define PACKAGE_STRING "PCRE2 10.00-DEV"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
@ -218,7 +210,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "9.00-DEV"
|
||||
#define PACKAGE_VERSION "10.00-DEV"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
|
@ -227,15 +219,13 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PARENS_NEST_LIMIT 250
|
||||
#endif
|
||||
|
||||
/* #undef PCRE2_EXP_DEFN */
|
||||
|
||||
/* The value of PCREGREP_BUFSIZE determines the size of buffer used by
|
||||
pcregrep to hold parts of the file it is searching. This is also the
|
||||
minimum value. The actual amount of memory used by pcregrep is three times
|
||||
/* The value of PCRE2GREP_BUFSIZE determines the size of buffer used by
|
||||
pcre2grep to hold parts of the file it is searching. This is also the
|
||||
minimum value. The actual amount of memory used by pcre2grep is three times
|
||||
this number, because it allows for the buffering of "before" and "after"
|
||||
lines. */
|
||||
#ifndef PCREGREP_BUFSIZE
|
||||
#define PCREGREP_BUFSIZE 20480
|
||||
#ifndef PCRE2GREP_BUFSIZE
|
||||
#define PCRE2GREP_BUFSIZE 20480
|
||||
#endif
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
|
@ -247,21 +237,10 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
This macro apears at the start of every exported function that is part
|
||||
of the external API. It does not appear on functions that are "external"
|
||||
in the C sense, but which are internal to the library. */
|
||||
/* #undef PCRE_EXP_DEFN */
|
||||
/* #undef PCRE2_EXP_DEFN */
|
||||
|
||||
/* Define to any value if linking statically (TODO: make nice with Libtool) */
|
||||
/* #undef PCRE_STATIC */
|
||||
|
||||
/* When calling PCRE via the POSIX interface, additional working storage is
|
||||
required for holding the pointers to capturing substrings because PCRE
|
||||
requires three integers per substring, whereas the POSIX interface provides
|
||||
only two. If the number of expected substrings is small, the wrapper
|
||||
function uses space on the stack, because this is faster than using
|
||||
malloc() for each call. The threshold above which the stack is no longer
|
||||
used is defined by POSIX_MALLOC_THRESHOLD. */
|
||||
#ifndef POSIX_MALLOC_THRESHOLD
|
||||
#define POSIX_MALLOC_THRESHOLD 10
|
||||
#endif
|
||||
/* #undef PCRE2_STATIC */
|
||||
|
||||
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||
your system. */
|
||||
|
@ -273,35 +252,35 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Define to any value to enable support for Just-In-Time compiling. */
|
||||
/* #undef SUPPORT_JIT */
|
||||
|
||||
/* Define to any value to allow pcregrep to be linked with libbz2, so that it
|
||||
/* Define to any value to allow pcre2grep to be linked with libbz2, so that it
|
||||
is able to handle .bz2 files. */
|
||||
/* #undef SUPPORT_LIBBZ2 */
|
||||
|
||||
/* Define to any value to allow pcretest to be linked with libedit. */
|
||||
/* Define to any value to allow pcre2test to be linked with libedit. */
|
||||
/* #undef SUPPORT_LIBEDIT */
|
||||
|
||||
/* Define to any value to allow pcretest to be linked with libreadline. */
|
||||
/* Define to any value to allow pcre2test to be linked with libreadline. */
|
||||
/* #undef SUPPORT_LIBREADLINE */
|
||||
|
||||
/* Define to any value to allow pcregrep to be linked with libz, so that it is
|
||||
able to handle .gz files. */
|
||||
/* Define to any value to allow pcre2grep to be linked with libz, so that it
|
||||
is able to handle .gz files. */
|
||||
/* #undef SUPPORT_LIBZ */
|
||||
|
||||
/* Define to any value to enable the 16 bit PCRE library. */
|
||||
/* Define to any value to enable the 16 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE16 */
|
||||
|
||||
/* Define to any value to enable the 32 bit PCRE library. */
|
||||
/* Define to any value to enable JIT support in pcre2grep. */
|
||||
/* #undef SUPPORT_PCRE2GREP_JIT */
|
||||
|
||||
/* Define to any value to enable the 32 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE32 */
|
||||
|
||||
/* Define to any value to enable the 8 bit PCRE library. */
|
||||
/* Define to any value to enable the 8 bit PCRE2 library. */
|
||||
/* #undef SUPPORT_PCRE8 */
|
||||
|
||||
/* Define to any value to enable JIT support in pcregrep. */
|
||||
/* #undef SUPPORT_PCREGREP_JIT */
|
||||
|
||||
/* Define to any value to enable support for the UTF-8/16/32 Unicode encoding.
|
||||
This will work even in an EBCDIC environment, but it is incompatible with
|
||||
the EBCDIC macro. That is, PCRE can support *either* EBCDIC code *or*
|
||||
the EBCDIC macro. That is, PCRE2 can support *either* EBCDIC code *or*
|
||||
ASCII/UTF-8/16/32, but not both at once. */
|
||||
/* #undef SUPPORT_UTF */
|
||||
|
||||
|
@ -309,7 +288,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* #undef SUPPORT_VALGRIND */
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "9.00-DEV"
|
||||
#define VERSION "10.00-DEV"
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
|
|
@ -271,12 +271,14 @@ typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
|
|||
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
|
||||
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
|
||||
|
||||
/* Offsets in the pattern (for errors) and in the subject (after a match) are
|
||||
unsigned 32-bit numbers. We also define a value to indicate "unset" in the
|
||||
offset vector (ovector). */
|
||||
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE22,
|
||||
including pattern offsets for errors and subject offsets after a match. We
|
||||
define special values to indicate zero-terminated strings and unset offsets in
|
||||
the offset vector (ovector). */
|
||||
|
||||
#define PCRE2_OFFSET PCRE2_UCHAR32
|
||||
#define PCRE2_UNSET (~(PCRE2_OFFSET)0)
|
||||
#define PCRE2_SIZE size_t
|
||||
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||
|
||||
/* Generic types for opaque structures and JIT callback functions. These
|
||||
declarations are defined in a macro that is expanded for each width later. */
|
||||
|
@ -314,17 +316,17 @@ typedef struct pcre2_callout_block { \
|
|||
int version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
PCRE2_OFFSET *offset_vector; /* The offset vector */ \
|
||||
PCRE2_SIZE *offset_vector; /* The offset vector */ \
|
||||
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||
size_t subject_length; /* The length of the subject */ \
|
||||
PCRE2_OFFSET start_match; /* Offset to start of this match attempt */ \
|
||||
PCRE2_OFFSET current_position; /* Where we currently are in the subject */ \
|
||||
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
|
||||
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
|
||||
uint32_t capture_top; /* Max current capture */ \
|
||||
uint32_t capture_last; /* Most recently closed capture */ \
|
||||
void *callout_data; /* Data passed in with the call */ \
|
||||
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||
PCRE2_OFFSET pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_OFFSET next_item_length; /* Length of next item in the pattern */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
/* ------------------- Added for Version 2 -------------------------- */ \
|
||||
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
|
@ -392,8 +394,8 @@ PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
|
|||
|
||||
#define PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_code *pcre2_compile(PCRE2_SPTR, int, uint32_t, \
|
||||
int *, PCRE2_OFFSET *, pcre2_compile_context *); \
|
||||
pcre2_code *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, \
|
||||
int *, PCRE2_SIZE *, pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *);
|
||||
|
||||
|
||||
|
@ -408,25 +410,25 @@ PCRE2_EXP_DECL int pcre2_pattern_info(const pcre2_code *, uint32_t, \
|
|||
|
||||
#define PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_match_data *pcre2_match_data_create(uint32_t, \
|
||||
pcre2_general_context *); \
|
||||
pcre2_match_data *pcre2_match_data_create(uint32_t, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_match_data *pcre2_match_data_create_from_pattern(pcre2_code *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, \
|
||||
PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *, int *, \
|
||||
size_t); \
|
||||
PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \
|
||||
PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_leftchar(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_OFFSET *pcre2_get_ovector_pointer(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_rightchar(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_startchar(pcre2_match_data *);
|
||||
pcre2_match_data *pcre2_match_data_create_from_pattern(pcre2_code *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, \
|
||||
PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *, int *, \
|
||||
size_t); \
|
||||
PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_leftchar(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_rightchar(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *);
|
||||
|
||||
|
||||
/* Convenience functions for handling matched substrings. */
|
||||
|
@ -457,9 +459,9 @@ PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \
|
|||
/* Functions for JIT processing */
|
||||
|
||||
#define PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL void pcre2_jit_compile(pcre2_code *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_jit_compile(pcre2_code *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \
|
||||
PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \
|
||||
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *, \
|
||||
pcre2_jit_stack *); \
|
||||
PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *);\
|
||||
|
|
|
@ -7256,8 +7256,8 @@ Returns: pointer to compiled data block, or NULL on error,
|
|||
*/
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION
|
||||
pcre2_compile(PCRE2_SPTR pattern, int patlen, uint32_t options,
|
||||
int *errorptr, PCRE2_OFFSET *erroroffset, pcre2_compile_context *ccontext)
|
||||
pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
|
||||
int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
|
||||
{
|
||||
BOOL utf; /* Set TRUE for UTF mode */
|
||||
pcre2_real_code *re = NULL; /* What we will return */
|
||||
|
@ -7324,10 +7324,12 @@ if (ccontext == NULL)
|
|||
ccontext = &default_context;
|
||||
}
|
||||
|
||||
/* A negative pattern length means "zero-terminated". Otherwise, we make
|
||||
a copy of the pattern and add a zero. */
|
||||
/* A zero-terminated pattern is indicated by the special length value
|
||||
PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero,
|
||||
to ensure that it is always possible to look one code unit beyond the end of
|
||||
the pattern's characters. */
|
||||
|
||||
if (patlen < 0) patlen = PRIV(strlen)(pattern); else
|
||||
if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern); else
|
||||
{
|
||||
if (patlen < COPIED_PATTERN_SIZE)
|
||||
copied_pattern = stack_copied_pattern;
|
||||
|
@ -7473,7 +7475,7 @@ if (utf)
|
|||
goto HAD_ERROR;
|
||||
}
|
||||
if ((options & PCRE2_NO_UTF_CHECK) == 0 &&
|
||||
(errorcode = PRIV(valid_utf)(pattern, -1, erroroffset)) != 0)
|
||||
(errorcode = PRIV(valid_utf)(pattern, patlen, erroroffset)) != 0)
|
||||
goto HAD_ERROR;
|
||||
}
|
||||
|
||||
|
|
|
@ -136,7 +136,7 @@ switch (what)
|
|||
break;
|
||||
|
||||
case PCRE2_CONFIG_STACKRECURSE:
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
*((int *)where) = 0;
|
||||
#else
|
||||
*((int *)where) = 1;
|
||||
|
|
|
@ -164,7 +164,7 @@ if (defmemctl)
|
|||
mcontext->memctl.free = default_free;
|
||||
mcontext->memctl.memory_data = NULL;
|
||||
}
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
mcontext->stack_memctl = mcontext->memctl;
|
||||
#endif
|
||||
mcontext->callout = NULL;
|
||||
|
@ -389,7 +389,7 @@ pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
|
|||
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
|
||||
void *mydata)
|
||||
{
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
mcontext->stack_memctl.malloc = mymalloc;
|
||||
mcontext->stack_memctl.free = myfree;
|
||||
mcontext->stack_memctl.memory_data = mydata;
|
||||
|
|
|
@ -365,8 +365,8 @@ internal_dfa_match(
|
|||
dfa_match_block *mb,
|
||||
PCRE2_SPTR this_start_code,
|
||||
PCRE2_SPTR current_subject,
|
||||
PCRE2_OFFSET start_offset,
|
||||
PCRE2_OFFSET *offsets,
|
||||
PCRE2_SIZE start_offset,
|
||||
PCRE2_SIZE *offsets,
|
||||
uint32_t offsetcount,
|
||||
int *workspace,
|
||||
int wscount,
|
||||
|
@ -730,7 +730,7 @@ for (;;)
|
|||
else if (match_count > 0 && ++match_count * 2 > (int)offsetcount)
|
||||
match_count = 0;
|
||||
count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2;
|
||||
if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
|
||||
if (count > 0) memmove(offsets + 2, offsets, count * sizeof(PCRE2_SIZE));
|
||||
if (offsetcount >= 2)
|
||||
{
|
||||
offsets[0] = (int)(current_subject - start_subject);
|
||||
|
@ -2560,7 +2560,7 @@ for (;;)
|
|||
case OP_ASSERTBACK_NOT:
|
||||
{
|
||||
PCRE2_SPTR endasscode = code + GET(code, 1);
|
||||
PCRE2_OFFSET local_offsets[2];
|
||||
PCRE2_SIZE local_offsets[2];
|
||||
int rc;
|
||||
int local_workspace[1000];
|
||||
|
||||
|
@ -2572,7 +2572,7 @@ for (;;)
|
|||
ptr, /* where we currently are */
|
||||
(int)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
sizeof(local_offsets)/sizeof(PCRE2_OFFSET), /* size of same */
|
||||
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
sizeof(local_workspace)/sizeof(int), /* size of same */
|
||||
rlevel); /* function recursion level */
|
||||
|
@ -2587,7 +2587,7 @@ for (;;)
|
|||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
{
|
||||
PCRE2_OFFSET local_offsets[1000];
|
||||
PCRE2_SIZE local_offsets[1000];
|
||||
int local_workspace[1000];
|
||||
int codelink = GET(code, 1);
|
||||
int condcode;
|
||||
|
@ -2606,9 +2606,9 @@ for (;;)
|
|||
cb.callout_number = code[LINK_SIZE+2];
|
||||
cb.offset_vector = offsets;
|
||||
cb.subject = start_subject;
|
||||
cb.subject_length = (int)(end_subject - start_subject);
|
||||
cb.start_match = (int)(current_subject - start_subject);
|
||||
cb.current_position = (int)(ptr - start_subject);
|
||||
cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
|
||||
cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
|
||||
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
|
||||
cb.pattern_position = GET(code, LINK_SIZE + 3);
|
||||
cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
|
||||
cb.capture_top = 1;
|
||||
|
@ -2664,7 +2664,7 @@ for (;;)
|
|||
ptr, /* where we currently are */
|
||||
(int)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
sizeof(local_offsets)/sizeof(PCRE2_OFFSET), /* size of same */
|
||||
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
sizeof(local_workspace)/sizeof(int), /* size of same */
|
||||
rlevel); /* function recursion level */
|
||||
|
@ -2683,7 +2683,7 @@ for (;;)
|
|||
case OP_RECURSE:
|
||||
{
|
||||
dfa_recursion_info *ri;
|
||||
PCRE2_OFFSET local_offsets[1000];
|
||||
PCRE2_SIZE local_offsets[1000];
|
||||
int local_workspace[1000];
|
||||
PCRE2_SPTR callpat = start_code + GET(code, 1);
|
||||
uint32_t recno = (callpat == mb->start_code)? 0 :
|
||||
|
@ -2712,7 +2712,7 @@ for (;;)
|
|||
ptr, /* where we currently are */
|
||||
(int)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
sizeof(local_offsets)/sizeof(PCRE2_OFFSET), /* size of same */
|
||||
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
sizeof(local_workspace)/sizeof(int), /* size of same */
|
||||
rlevel); /* function recursion level */
|
||||
|
@ -2777,7 +2777,7 @@ for (;;)
|
|||
|
||||
for (matched_count = 0;; matched_count++)
|
||||
{
|
||||
PCRE2_OFFSET local_offsets[2];
|
||||
PCRE2_SIZE local_offsets[2];
|
||||
int local_workspace[1000];
|
||||
|
||||
int rc = internal_dfa_match(
|
||||
|
@ -2786,7 +2786,7 @@ for (;;)
|
|||
local_ptr, /* where we currently are */
|
||||
(int)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
sizeof(local_offsets)/sizeof(PCRE2_OFFSET), /* size of same */
|
||||
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
sizeof(local_workspace)/sizeof(int), /* size of same */
|
||||
rlevel); /* function recursion level */
|
||||
|
@ -2849,7 +2849,7 @@ for (;;)
|
|||
case OP_ONCE:
|
||||
case OP_ONCE_NC:
|
||||
{
|
||||
PCRE2_OFFSET local_offsets[2];
|
||||
PCRE2_SIZE local_offsets[2];
|
||||
int local_workspace[1000];
|
||||
|
||||
int rc = internal_dfa_match(
|
||||
|
@ -2858,7 +2858,7 @@ for (;;)
|
|||
ptr, /* where we currently are */
|
||||
(int)(ptr - start_subject), /* start offset */
|
||||
local_offsets, /* offset vector */
|
||||
sizeof(local_offsets)/sizeof(PCRE2_OFFSET), /* size of same */
|
||||
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
|
||||
local_workspace, /* workspace vector */
|
||||
sizeof(local_workspace)/sizeof(int), /* size of same */
|
||||
rlevel); /* function recursion level */
|
||||
|
@ -2948,9 +2948,9 @@ for (;;)
|
|||
cb.callout_number = code[1];
|
||||
cb.offset_vector = offsets;
|
||||
cb.subject = start_subject;
|
||||
cb.subject_length = (int)(end_subject - start_subject);
|
||||
cb.start_match = (int)(current_subject - start_subject);
|
||||
cb.current_position = (int)(ptr - start_subject);
|
||||
cb.subject_length = (PCRE2_SIZE)(end_subject - start_subject);
|
||||
cb.start_match = (PCRE2_SIZE)(current_subject - start_subject);
|
||||
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
|
||||
cb.pattern_position = GET(code, 2);
|
||||
cb.next_item_length = GET(code, 2 + LINK_SIZE);
|
||||
cb.capture_top = 1;
|
||||
|
@ -3049,8 +3049,8 @@ Returns: > 0 => number of match offset pairs placed in offsets
|
|||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, int length,
|
||||
PCRE2_OFFSET start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext, int *workspace, size_t wscount)
|
||||
{
|
||||
const pcre2_real_code *re = (const pcre2_real_code *)code;
|
||||
|
@ -3078,9 +3078,10 @@ is used below, and it expects NLBLOCK to be defined as a pointer. */
|
|||
dfa_match_block actual_match_block;
|
||||
dfa_match_block *mb = &actual_match_block;
|
||||
|
||||
/* A negative length implies a zero-terminated subject string. */
|
||||
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
|
||||
subject string. */
|
||||
|
||||
if (length < 0) length = PRIV(strlen)(subject);
|
||||
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
|
||||
|
||||
/* Plausibility checks */
|
||||
|
||||
|
@ -3088,7 +3089,7 @@ if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
|
|||
if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
|
||||
return PCRE2_ERROR_NULL;
|
||||
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
|
||||
if ((int)start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
||||
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
|
||||
|
@ -3214,7 +3215,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
|||
return match_data->rc;
|
||||
}
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (start_offset > 0 && (int)start_offset < length &&
|
||||
if (start_offset > 0 && start_offset < length &&
|
||||
NOT_FIRSTCHAR(subject[start_offset]))
|
||||
return PCRE2_ERROR_BADUTFOFFSET;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
@ -3466,12 +3467,12 @@ for (;;)
|
|||
{
|
||||
if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0)
|
||||
{
|
||||
match_data->ovector[0] = (PCRE2_OFFSET)(start_match - subject);
|
||||
match_data->ovector[1] = (PCRE2_OFFSET)(end_subject - subject);
|
||||
match_data->ovector[0] = (PCRE2_SIZE)(start_match - subject);
|
||||
match_data->ovector[1] = (PCRE2_SIZE)(end_subject - subject);
|
||||
}
|
||||
match_data->leftchar = (PCRE2_OFFSET)(mb->start_used_ptr - subject);
|
||||
match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
|
||||
match_data->rightchar = 0; /* FIXME */
|
||||
match_data->startchar = (PCRE2_OFFSET)(start_match - subject);
|
||||
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
|
||||
match_data->rc = rc;
|
||||
return rc;
|
||||
}
|
||||
|
|
|
@ -1859,7 +1859,7 @@ extern void _pcre2_compile_context_init(pcre2_compile_context *, BOOL);
|
|||
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
|
||||
BOOL);
|
||||
extern int _pcre2_jit_get_size(void *);
|
||||
extern size_t _pcre2_jit_get_size(void *);
|
||||
extern void _pcre2_match_context_init(pcre2_match_context *, BOOL);
|
||||
extern void *_pcre2_memctl_malloc(size_t, pcre2_memctl *);
|
||||
extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *);
|
||||
|
@ -1869,7 +1869,7 @@ extern int _pcre2_strlen(PCRE2_SPTR);
|
|||
extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t);
|
||||
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
|
||||
extern int _pcre2_study(pcre2_real_code *);
|
||||
extern int _pcre2_valid_utf(PCRE2_SPTR, int, PCRE2_OFFSET *);
|
||||
extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
|
||||
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
|
||||
BOOL);
|
||||
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
|
||||
|
|
|
@ -565,7 +565,7 @@ typedef struct pcre2_real_compile_context {
|
|||
|
||||
typedef struct pcre2_real_match_context {
|
||||
pcre2_memctl memctl;
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
pcre2_memctl stack_memctl;
|
||||
#endif
|
||||
int (*callout)(pcre2_callout_block *);
|
||||
|
@ -609,12 +609,12 @@ typedef struct pcre2_real_match_data {
|
|||
const pcre2_real_code *code; /* The pattern used for the match */
|
||||
PCRE2_SPTR subject; /* The subject that was matched */
|
||||
int rc; /* The return code from the match */
|
||||
PCRE2_OFFSET leftchar; /* Offset to leftmost code unit */
|
||||
PCRE2_OFFSET rightchar; /* Offset to rightmost code unit */
|
||||
PCRE2_OFFSET startchar; /* Offset to starting code unit */
|
||||
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
|
||||
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
|
||||
PCRE2_SIZE startchar; /* Offset to starting code unit */
|
||||
PCRE2_SPTR mark; /* Pointer to last mark */
|
||||
uint16_t oveccount; /* Number of pairs */
|
||||
PCRE2_OFFSET ovector[1]; /* The first field */
|
||||
PCRE2_SIZE ovector[1]; /* The first field */
|
||||
} pcre2_real_match_data;
|
||||
|
||||
|
||||
|
@ -686,12 +686,12 @@ typedef struct compile_block {
|
|||
call within the pattern; used by pcre_match(). */
|
||||
|
||||
typedef struct recursion_info {
|
||||
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
||||
unsigned int group_num; /* Number of group that was called */
|
||||
PCRE2_OFFSET *offset_save; /* Pointer to start of saved offsets */
|
||||
uint32_t saved_max; /* Number of saved offsets */
|
||||
uint32_t saved_capture_last; /* Last capture number */
|
||||
PCRE2_SPTR subject_position; /* Position at start of recursion */
|
||||
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
||||
unsigned int group_num; /* Number of group that was called */
|
||||
PCRE2_SIZE *ovec_save; /* Pointer to start of saved ovector */
|
||||
uint32_t saved_max; /* Number of saved offsets */
|
||||
uint32_t saved_capture_last; /* Last capture number */
|
||||
PCRE2_SPTR subject_position; /* Position at start of recursion */
|
||||
} recursion_info;
|
||||
|
||||
/* A similar structure for pcre_dfa_match(). */
|
||||
|
@ -717,7 +717,7 @@ doing traditional NFA matching (pcre2_match() and friends). */
|
|||
|
||||
typedef struct match_block {
|
||||
pcre2_memctl memctl; /* For general use */
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
pcre2_memctl stack_memctl; /* For "stack" frames */
|
||||
#endif
|
||||
uint32_t match_call_count; /* As it says */
|
||||
|
@ -728,11 +728,11 @@ typedef struct match_block {
|
|||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
const uint8_t *fcc; /* Points to case-flipping table */
|
||||
const uint8_t *ctypes; /* Points to table of type maps */
|
||||
PCRE2_OFFSET *ovector; /* Pointer to the offset vector */
|
||||
PCRE2_OFFSET offset_end; /* One past the end */
|
||||
PCRE2_OFFSET offset_max; /* The maximum usable for return data */
|
||||
PCRE2_OFFSET start_offset; /* The start offset value */
|
||||
PCRE2_OFFSET end_offset_top; /* Highwater mark at end of match */
|
||||
PCRE2_SIZE *ovector; /* Pointer to the offset vector */
|
||||
PCRE2_SIZE offset_end; /* One past the end */
|
||||
PCRE2_SIZE offset_max; /* The maximum usable for return data */
|
||||
PCRE2_SIZE start_offset; /* The start offset value */
|
||||
PCRE2_SIZE end_offset_top; /* Highwater mark at end of match */
|
||||
uint16_t partial; /* PARTIAL options */
|
||||
uint16_t bsr_convention; /* \R interpretation */
|
||||
uint16_t name_count; /* Number of names in name table */
|
||||
|
@ -760,7 +760,7 @@ typedef struct match_block {
|
|||
recursion_info *recursive; /* Linked list of recursion data */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
void *match_frames_base; /* For remembering malloc'd frames */
|
||||
#endif
|
||||
} match_block;
|
||||
|
@ -769,22 +769,22 @@ typedef struct match_block {
|
|||
functions. */
|
||||
|
||||
typedef struct dfa_match_block {
|
||||
pcre2_memctl memctl; /* For general use */
|
||||
PCRE2_SPTR start_code; /* Start of the compiled pattern */
|
||||
PCRE2_SPTR start_subject ; /* Start of the subject string */
|
||||
PCRE2_SPTR end_subject; /* End of subject string */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
const uint8_t *tables; /* Character tables */
|
||||
PCRE2_OFFSET start_offset; /* The start offset value */
|
||||
uint32_t moptions; /* Match options */
|
||||
uint32_t poptions; /* Pattern options */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
uint16_t bsr_convention; /* \R interpretation */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
pcre2_memctl memctl; /* For general use */
|
||||
PCRE2_SPTR start_code; /* Start of the compiled pattern */
|
||||
PCRE2_SPTR start_subject ; /* Start of the subject string */
|
||||
PCRE2_SPTR end_subject; /* End of subject string */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
const uint8_t *tables; /* Character tables */
|
||||
PCRE2_SIZE start_offset; /* The start offset value */
|
||||
uint32_t moptions; /* Match options */
|
||||
uint32_t poptions; /* Pattern options */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
uint16_t bsr_convention; /* \R interpretation */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
|
||||
dfa_recursion_info *recursive; /* Linked list of recursion data */
|
||||
dfa_recursion_info *recursive; /* Linked list of recursion data */
|
||||
} dfa_match_block;
|
||||
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
|
|
@ -50,11 +50,11 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
* JIT compile a Regular Expression *
|
||||
*************************************************/
|
||||
|
||||
/* This function used JIT to convert a previously-compiled pattern into machine
|
||||
/* This function used JIT to convert a previously-compiled pattern into machine
|
||||
code.
|
||||
|
||||
Arguments:
|
||||
code a compiled pattern
|
||||
code a compiled pattern
|
||||
options JIT option bits
|
||||
|
||||
Returns: nothing
|
||||
|
@ -62,16 +62,21 @@ Returns: nothing
|
|||
|
||||
/* FIXME: this is currently a placeholder function */
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_compile(pcre2_code *code, uint32_t options)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
||||
(void)code;
|
||||
(void)options;
|
||||
return PCRE2_ERROR_JIT_BADOPTION;
|
||||
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
|
||||
code=code; options = options; /* Dummy.... */
|
||||
/* Dummy code */
|
||||
code=code;
|
||||
options = options;
|
||||
return PCRE2_ERROR_JIT_BADOPTION;
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
|
|
@ -71,11 +71,12 @@ Returns: > 0 => success; value is the number of ovector pairs filled
|
|||
/* FIXME: this is currently a placeholder function */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, int length,
|
||||
PCRE2_OFFSET start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext, pcre2_jit_stack *jit_stack)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
||||
(void)code;
|
||||
(void)subject;
|
||||
(void)length;
|
||||
|
@ -85,17 +86,21 @@ pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, int length,
|
|||
(void)mcontext;
|
||||
(void)jit_stack;
|
||||
return PCRE2_ERROR_JIT_BADOPTION;
|
||||
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
|
||||
/* Dummy code */
|
||||
code=code;subject=subject;length=length;
|
||||
start_offset=start_offset; options=options; match_data=match_data;
|
||||
code=code;
|
||||
subject=subject;
|
||||
length=length;
|
||||
start_offset=start_offset;
|
||||
options=options;
|
||||
match_data=match_data;
|
||||
mcontext=mcontext;
|
||||
jit_stack=jit_stack;
|
||||
return PCRE2_ERROR_JIT_BADOPTION;
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_jit_match.c */
|
||||
|
|
|
@ -54,14 +54,18 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
*************************************************/
|
||||
|
||||
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_free_unused_memory(pcre2_general_context *gcontext)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
||||
(void)gcontext; /* Suppress warning */
|
||||
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
gcontext=gcontext; /* Dummy */
|
||||
/* Dummy code */
|
||||
gcontext=gcontext;
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
@ -71,19 +75,25 @@ gcontext=gcontext; /* Dummy */
|
|||
* Allocate a JIT stack *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_stack_alloc(pcre2_general_context *gcontext, size_t startsize,
|
||||
PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_stack_alloc(pcre2_general_context *gcontext, size_t startsize,
|
||||
size_t maxsize)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
||||
(void)gcontext;
|
||||
(void)startsize;
|
||||
(void)maxsize;
|
||||
return NULL;
|
||||
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
gcontext=gcontext;startsize=startsize;maxsize=maxsize;
|
||||
/* Dummy code */
|
||||
gcontext=gcontext;
|
||||
startsize=startsize;
|
||||
maxsize=maxsize;
|
||||
return NULL;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -97,12 +107,18 @@ pcre2_jit_stack_assign(const pcre2_code *code, pcre2_jit_callback callback,
|
|||
void *callback_data)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
||||
(void)code;
|
||||
(void)callback;
|
||||
(void)callback_data;
|
||||
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
code=code;callback=callback;callback_data=callback_data;
|
||||
/* Dummy code */
|
||||
code=code;
|
||||
callback=callback;
|
||||
callback_data=callback_data;
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
@ -115,10 +131,14 @@ PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
|||
pcre2_jit_stack_free(pcre2_jit_stack *jit_stack)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
||||
(void)jit_stack;
|
||||
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
/* Dummy code */
|
||||
jit_stack=jit_stack;
|
||||
|
||||
#endif /* SUPPORT_JIT */
|
||||
}
|
||||
|
||||
|
@ -127,16 +147,20 @@ jit_stack=jit_stack;
|
|||
* Get size of JIT code *
|
||||
*************************************************/
|
||||
|
||||
int
|
||||
size_t
|
||||
PRIV(jit_get_size)(void *executable_jit)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
||||
(void)executable_jit;
|
||||
return 0;
|
||||
|
||||
#else /* SUPPORT_JIT */
|
||||
|
||||
/* Dummy code */
|
||||
executable_jit = executable_jit;
|
||||
return 0; /* FIXME */
|
||||
return 0;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -104,17 +104,18 @@ for any one of them can use a range. */
|
|||
#define MATCH_BACKTRACK_MAX MATCH_THEN
|
||||
#define MATCH_BACKTRACK_MIN MATCH_COMMIT
|
||||
|
||||
/* Maximum number of ints of offset to save on the stack for recursive calls.
|
||||
If the offset vector is bigger, malloc is used. This should be a multiple of 3,
|
||||
because the offset vector is always a multiple of 3 long. */
|
||||
|
||||
#define REC_STACK_SAVE_MAX 30
|
||||
|
||||
/* Min and max values for the common repeats; for the maxima, 0 => infinity */
|
||||
|
||||
static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
|
||||
static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
|
||||
|
||||
/* Maximum number of ovector elements that can be saved on the system stack
|
||||
when processing OP_RECURSE in non-HEAP_MATCH_RECURSE mode. If the ovector is
|
||||
bigger, malloc() is used. This value should be a multiple of 3, because the
|
||||
ovector length is always a multiple of 3. */
|
||||
|
||||
#define OP_RECURSE_STACK_SAVE_MAX 45
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
|
@ -129,11 +130,11 @@ subject bytes matched may be different to the number of reference bytes.
|
|||
Arguments:
|
||||
offset index into the offset vector
|
||||
eptr pointer into the subject
|
||||
length length of reference to be matched (number of bytes)
|
||||
length length of reference to be matched (number of code units)
|
||||
mb points to match block
|
||||
caseless TRUE if caseless
|
||||
|
||||
Returns: >= 0 the number of subject bytes matched
|
||||
Returns: >= 0 the number of subject code units matched
|
||||
-1 no match
|
||||
-2 partial match; always given if at end subject
|
||||
*/
|
||||
|
@ -230,7 +231,7 @@ return (int)(eptr - eptr_start);
|
|||
RECURSION IN THE match() FUNCTION
|
||||
|
||||
The match() function is highly recursive, though not every recursive call
|
||||
increases the recursive depth. Nevertheless, some regular expressions can cause
|
||||
increases the recursion depth. Nevertheless, some regular expressions can cause
|
||||
it to recurse to a great depth. I was writing for Unix, so I just let it call
|
||||
itself recursively. This uses the stack for saving everything that has to be
|
||||
saved for a recursive call. On Unix, the stack can be large, and this works
|
||||
|
@ -241,9 +242,9 @@ programs that use a lot of stack. (This despite the fact that every last chip
|
|||
has oodles of memory these days, and techniques for extending the stack have
|
||||
been known for decades.) So....
|
||||
|
||||
There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
|
||||
calls by keeping local variables that need to be preserved in blocks of memory
|
||||
obtained from malloc() instead instead of on the stack. Macros are used to
|
||||
There is a fudge, triggered by defining HEAP_MATCH_RECURSE, which avoids
|
||||
recursive calls by keeping local variables that need to be preserved in blocks
|
||||
of memory on the heap instead instead of on the stack. Macros are used to
|
||||
achieve this so that the actual code doesn't look very different to what it
|
||||
always used to.
|
||||
|
||||
|
@ -274,11 +275,10 @@ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
|
|||
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
|
||||
RM61, RM62, RM63, RM64, RM65, RM66, RM67 };
|
||||
|
||||
/* These versions of the macros use the stack, as normal. There are debugging
|
||||
versions and production versions. Note that the "rw" argument of RMATCH isn't
|
||||
actually used in this definition. */
|
||||
/* These versions of the macros use the stack, as normal. Note that the "rw"
|
||||
argument of RMATCH isn't actually used in this definition. */
|
||||
|
||||
#ifndef NO_RECURSE
|
||||
#ifndef HEAP_MATCH_RECURSE
|
||||
#define REGISTER register
|
||||
#define RMATCH(ra,rb,rc,rd,re,rw) \
|
||||
rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
|
||||
|
@ -350,10 +350,9 @@ typedef struct heapframe {
|
|||
|
||||
eptrblock *Xeptrb;
|
||||
|
||||
PCRE2_OFFSET Xoffset;
|
||||
PCRE2_OFFSET Xoffset_top;
|
||||
PCRE2_OFFSET Xstacksave[REC_STACK_SAVE_MAX];
|
||||
PCRE2_OFFSET Xsave_offset1, Xsave_offset2, Xsave_offset3;
|
||||
PCRE2_SIZE Xoffset;
|
||||
PCRE2_SIZE Xoffset_top;
|
||||
PCRE2_SIZE Xsave_offset1, Xsave_offset2, Xsave_offset3;
|
||||
|
||||
uint32_t Xfc;
|
||||
uint32_t Xnumber;
|
||||
|
@ -395,6 +394,99 @@ typedef struct heapframe {
|
|||
***************************************************************************/
|
||||
|
||||
|
||||
/* When HEAP_MATCH_RECURSE is not defined, the match() function implements
|
||||
backtrack points by calling itself recursively in all but one case. The one
|
||||
special case is when processing OP_RECURSE, which specifies recursion in the
|
||||
pattern. The entire ovector must be saved and restored while processing
|
||||
OP_RECURSE. If the ovector is small enough, instead of calling match()
|
||||
directly, op_recurse_ovecsave() is called. This function uses the system stack
|
||||
to save the ovector while calling match() to process the pattern recursion. */
|
||||
|
||||
#ifndef HEAP_MATCH_RECURSE
|
||||
|
||||
/* We need a prototype for match() because it is mutually recursive with
|
||||
op_recurse_ovecsave(). */
|
||||
|
||||
static int
|
||||
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
|
||||
PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth);
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Process OP_RECURSE, stacking ovector *
|
||||
*************************************************/
|
||||
|
||||
/* When this function is called, mb->recursive has already been updated to
|
||||
point to a new recursion data block, and all its fields other than ovec_save
|
||||
have been set.
|
||||
|
||||
Arguments:
|
||||
eptr pointer to current character in subject
|
||||
callpat the recursion point in the pattern
|
||||
mstart pointer to the current match start position (can be modified
|
||||
by encountering \K)
|
||||
offset_top current top pointer
|
||||
mb pointer to "static" info block for the match
|
||||
eptrb pointer to chain of blocks containing eptr at start of
|
||||
brackets - for testing for empty matches
|
||||
rdepth the recursion depth
|
||||
|
||||
Returns: a match() return code
|
||||
*/
|
||||
|
||||
static int
|
||||
op_recurse_ovecsave(REGISTER PCRE2_SPTR eptr, PCRE2_SPTR callpat,
|
||||
PCRE2_SPTR mstart, PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb,
|
||||
uint32_t rdepth)
|
||||
{
|
||||
register int rrc;
|
||||
BOOL cbegroup = *callpat >= OP_SBRA;
|
||||
recursion_info *new_recursive = mb->recursive;
|
||||
PCRE2_SIZE ovecsave[OP_RECURSE_STACK_SAVE_MAX];
|
||||
|
||||
/* Save the ovector */
|
||||
|
||||
new_recursive->ovec_save = ovecsave;
|
||||
memcpy(ovecsave, mb->ovector, new_recursive->saved_max * sizeof(PCRE2_SIZE));
|
||||
|
||||
/* Do the recursion. After processing each alternative, restore the ovector
|
||||
data and the last captured value. */
|
||||
|
||||
do
|
||||
{
|
||||
if (cbegroup) mb->match_function_type = MATCH_CBEGROUP;
|
||||
rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
|
||||
mb, eptrb, rdepth + 1);
|
||||
memcpy(mb->ovector, new_recursive->ovec_save,
|
||||
new_recursive->saved_max * sizeof(PCRE2_SIZE));
|
||||
mb->capture_last = new_recursive->saved_capture_last;
|
||||
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) return rrc;
|
||||
|
||||
/* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
|
||||
recursion; they cause a NOMATCH for the entire recursion. These codes
|
||||
are defined in a range that can be tested for. */
|
||||
|
||||
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
|
||||
return MATCH_NOMATCH;
|
||||
|
||||
/* Any return code other than NOMATCH is an error. Otherwise, advance to the
|
||||
next alternative or to the end of the recursing subpattern. If there were
|
||||
nested recursions, mb->recursive might be changed, so reset it before
|
||||
looping. */
|
||||
|
||||
if (rrc != MATCH_NOMATCH) return rrc;
|
||||
mb->recursive = new_recursive;
|
||||
callpat += GET(callpat, 1);
|
||||
}
|
||||
while (*callpat == OP_ALT); /* Loop for the alternatives */
|
||||
|
||||
/* None of the alternatives matched. */
|
||||
|
||||
return MATCH_NOMATCH;
|
||||
}
|
||||
#endif /* HEAP_MATCH_RECURSE */
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match from current position *
|
||||
|
@ -451,9 +543,8 @@ Returns: MATCH_MATCH if matched ) these values are >= 0
|
|||
*/
|
||||
|
||||
static int
|
||||
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode,
|
||||
PCRE2_SPTR mstart, PCRE2_OFFSET offset_top, match_block *mb, eptrblock *eptrb,
|
||||
uint32_t rdepth)
|
||||
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
|
||||
PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth)
|
||||
{
|
||||
/* These variables do not need to be preserved over recursion in this function,
|
||||
so they can be ordinary variables in all cases. Mark some of them with
|
||||
|
@ -475,7 +566,7 @@ whenever RMATCH() does a "recursion". See the macro definitions above. Putting
|
|||
the top-level on the stack rather than malloc-ing them all gives a performance
|
||||
boost in many cases where there is not much "recursion". */
|
||||
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
heapframe *frame = (heapframe *)mb->match_frames_base;
|
||||
|
||||
/* Copy in the original argument variables */
|
||||
|
@ -535,7 +626,6 @@ HEAP_RECURSE:
|
|||
#define save_offset1 frame->Xsave_offset1
|
||||
#define save_offset2 frame->Xsave_offset2
|
||||
#define save_offset3 frame->Xsave_offset3
|
||||
#define stacksave frame->Xstacksave
|
||||
|
||||
#define condition frame->Xcondition
|
||||
#define cur_is_word frame->Xcur_is_word
|
||||
|
@ -543,11 +633,11 @@ HEAP_RECURSE:
|
|||
|
||||
#define newptrb frame->Xnewptrb
|
||||
|
||||
/* When recursion is being used, local variables are allocated on the stack and
|
||||
get preserved during recursion in the normal way. In this environment, fi and
|
||||
i, and fc and c, can be the same variables. */
|
||||
/* When normal stack-based recursion is being used for match(), local variables
|
||||
are allocated on the stack and get preserved during recursion in the usual way.
|
||||
In this environment, fi and i, and fc and c, can be the same variables. */
|
||||
|
||||
#else /* NO_RECURSE not defined */
|
||||
#else /* HEAP_MATCH_RECURSE not defined */
|
||||
#define fi i
|
||||
#define fc c
|
||||
|
||||
|
@ -569,9 +659,8 @@ PCRE2_SPTR pp;
|
|||
PCRE2_SPTR prev;
|
||||
PCRE2_SPTR saved_eptr;
|
||||
|
||||
PCRE2_OFFSET offset;
|
||||
PCRE2_OFFSET stacksave[REC_STACK_SAVE_MAX];
|
||||
PCRE2_OFFSET save_offset1, save_offset2, save_offset3;
|
||||
PCRE2_SIZE offset;
|
||||
PCRE2_SIZE save_offset1, save_offset2, save_offset3;
|
||||
|
||||
uint32_t number;
|
||||
uint32_t op;
|
||||
|
@ -597,7 +686,7 @@ BOOL prev_is_word;
|
|||
|
||||
eptrblock newptrb;
|
||||
recursion_info new_recursive;
|
||||
#endif /* NO_RECURSE not defined */
|
||||
#endif /* HEAP_MATCH_RECURSE not defined */
|
||||
|
||||
/* To save space on the stack and in the heap frame, I have doubled up on some
|
||||
of the local variables that are used only in localised parts of the code, but
|
||||
|
@ -622,19 +711,19 @@ prop_fail_result = 0;
|
|||
|
||||
|
||||
/* This label is used for tail recursion, which is used in a few cases even
|
||||
when NO_RECURSE is not defined, in order to reduce the amount of stack that is
|
||||
used. Thanks to Ian Taylor for noticing this possibility and sending the
|
||||
original patch. */
|
||||
when HEAP_MATCH_RECURSE is not defined, in order to reduce the amount of stack
|
||||
that is used. Thanks to Ian Taylor for noticing this possibility and sending
|
||||
the original patch. */
|
||||
|
||||
TAIL_RECURSE:
|
||||
|
||||
/* OK, now we can get on with the real code of the function. Recursive calls
|
||||
are specified by the macro RMATCH and RRETURN is used to return. When
|
||||
NO_RECURSE is *not* defined, these just turn into a recursive call to match()
|
||||
and a "return", respectively. However, RMATCH isn't like a function call
|
||||
because it's quite a complicated macro. It has to be used in one particular
|
||||
way. This shouldn't, however, impact performance when true recursion is being
|
||||
used. */
|
||||
HEAP_MATCH_RECURSE is *not* defined, these just turn into a recursive call to
|
||||
match() and a "return", respectively. However, RMATCH isn't like a function
|
||||
call because it's quite a complicated macro. It has to be used in one
|
||||
particular way. This shouldn't, however, impact performance when true recursion
|
||||
is being used. */
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
utf = (mb->poptions & PCRE2_UTF) != 0;
|
||||
|
@ -668,7 +757,7 @@ if (mb->match_function_type == MATCH_CBEGROUP)
|
|||
mb->match_function_type = 0;
|
||||
}
|
||||
|
||||
/* Now start processing the opcodes. */
|
||||
/* Now, at last, we can start processing the opcodes. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
|
@ -1205,9 +1294,9 @@ for (;;)
|
|||
cb.callout_number = ecode[1];
|
||||
cb.offset_vector = mb->ovector;
|
||||
cb.subject = mb->start_subject;
|
||||
cb.subject_length = (int)(mb->end_subject - mb->start_subject);
|
||||
cb.start_match = (int)(mstart - mb->start_subject);
|
||||
cb.current_position = (int)(eptr - mb->start_subject);
|
||||
cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
|
||||
cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject);
|
||||
cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
|
||||
cb.pattern_position = GET(ecode, 2);
|
||||
cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
|
||||
cb.capture_top = offset_top/2;
|
||||
|
@ -1230,7 +1319,7 @@ for (;;)
|
|||
condition = FALSE;
|
||||
switch(condcode = *ecode)
|
||||
{
|
||||
case OP_RREF: /* Numbered group recursion test */
|
||||
case OP_RREF: /* Numbered group recursion test */
|
||||
if (mb->recursive != NULL) /* Not recursing => FALSE */
|
||||
{
|
||||
uint32_t recno = GET2(ecode, 1); /* Recursion group number*/
|
||||
|
@ -1588,9 +1677,9 @@ for (;;)
|
|||
cb.callout_number = ecode[1];
|
||||
cb.offset_vector = mb->ovector;
|
||||
cb.subject = mb->start_subject;
|
||||
cb.subject_length = (int)(mb->end_subject - mb->start_subject);
|
||||
cb.start_match = (int)(mstart - mb->start_subject);
|
||||
cb.current_position = (int)(eptr - mb->start_subject);
|
||||
cb.subject_length = (PCRE2_SIZE)(mb->end_subject - mb->start_subject);
|
||||
cb.start_match = (PCRE2_SIZE)(mstart - mb->start_subject);
|
||||
cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
|
||||
cb.pattern_position = GET(ecode, 2);
|
||||
cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
|
||||
cb.capture_top = offset_top/2;
|
||||
|
@ -1613,7 +1702,7 @@ for (;;)
|
|||
all the potential data. There may be up to 65535 such values, which is too
|
||||
large to put on the stack, but using malloc for small numbers seems
|
||||
expensive. As a compromise, the stack is used when there are no more than
|
||||
REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
|
||||
OP_RECURSE_STACK_SAVE_MAX values to store; otherwise malloc is used.
|
||||
|
||||
There are also other values that have to be saved. We use a chained
|
||||
sequence of blocks that actually live on the stack. Thanks to Robin Houston
|
||||
|
@ -1626,12 +1715,11 @@ for (;;)
|
|||
uint32_t recno;
|
||||
|
||||
callpat = mb->start_code + GET(ecode, 1);
|
||||
recno = (callpat == mb->start_code)? 0 :
|
||||
GET2(callpat, 1 + LINK_SIZE);
|
||||
recno = (callpat == mb->start_code)? 0 : GET2(callpat, 1 + LINK_SIZE);
|
||||
|
||||
/* Check for repeating a recursion without advancing the subject pointer.
|
||||
This should catch convoluted mutual recursions. (Some simple cases are
|
||||
caught at compile time.) */
|
||||
/* Check for repeating a pattern recursion without advancing the subject
|
||||
pointer. This should catch convoluted mutual recursions. (Some simple
|
||||
cases are caught at compile time.) */
|
||||
|
||||
for (ri = mb->recursive; ri != NULL; ri = ri->prevrec)
|
||||
if (recno == ri->group_num && eptr == ri->subject_position)
|
||||
|
@ -1641,6 +1729,7 @@ for (;;)
|
|||
|
||||
new_recursive.group_num = recno;
|
||||
new_recursive.saved_capture_last = mb->capture_last;
|
||||
new_recursive.saved_max = mb->offset_end;
|
||||
new_recursive.subject_position = eptr;
|
||||
new_recursive.prevrec = mb->recursive;
|
||||
mb->recursive = &new_recursive;
|
||||
|
@ -1649,78 +1738,93 @@ for (;;)
|
|||
|
||||
ecode += 1 + LINK_SIZE;
|
||||
|
||||
/* Now save the offset data */
|
||||
|
||||
new_recursive.saved_max = mb->offset_end;
|
||||
if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
|
||||
new_recursive.offset_save = stacksave;
|
||||
else
|
||||
/* When we are using the system stack for match() recursion we can call a
|
||||
function that uses the system stack for preserving the ovector while
|
||||
processing the pattern recursion, but only if the ovector is small
|
||||
enough. */
|
||||
|
||||
#ifndef HEAP_MATCH_RECURSE
|
||||
if (new_recursive.saved_max <= OP_RECURSE_STACK_SAVE_MAX)
|
||||
{
|
||||
new_recursive.offset_save = (PCRE2_OFFSET *)
|
||||
(mb->memctl.malloc(new_recursive.saved_max * sizeof(PCRE2_OFFSET),
|
||||
mb->memctl.memory_data));
|
||||
if (new_recursive.offset_save == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
|
||||
rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
|
||||
eptrb, rdepth);
|
||||
mb->recursive = new_recursive.prevrec;
|
||||
if (rrc != MATCH_MATCH && rrc != MATCH_ACCEPT) RRETURN(rrc);
|
||||
|
||||
/* Set where we got to in the subject, and reset the start, in case
|
||||
it was changed by \K. This *is* propagated back out of a recursion,
|
||||
for Perl compatibility. */
|
||||
|
||||
eptr = mb->end_match_ptr;
|
||||
mstart = mb->start_match_ptr;
|
||||
break; /* End of processing OP_RECURSE */
|
||||
}
|
||||
memcpy(new_recursive.offset_save, mb->ovector,
|
||||
new_recursive.saved_max * sizeof(PCRE2_OFFSET));
|
||||
|
||||
/* OK, now we can do the recursion. After processing each alternative,
|
||||
restore the offset data and the last captured value. If there were nested
|
||||
recursions, mb->recursive might be changed, so reset it before looping.
|
||||
*/
|
||||
|
||||
#endif
|
||||
/* If the ovector is too big, or if we are using the heap for match()
|
||||
recursion, we have to use the heap for saving the ovector. */
|
||||
|
||||
new_recursive.ovec_save = (PCRE2_SIZE *)
|
||||
(mb->memctl.malloc(new_recursive.saved_max * sizeof(PCRE2_SIZE),
|
||||
mb->memctl.memory_data));
|
||||
if (new_recursive.ovec_save == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
|
||||
memcpy(new_recursive.ovec_save, mb->ovector,
|
||||
new_recursive.saved_max * sizeof(PCRE2_SIZE));
|
||||
|
||||
/* Do the recursion. After processing each alternative, restore the
|
||||
ovector data and the last captured value. This code has the same overall
|
||||
logic as the code in the op_recurse_ovecsave() function, but is adapted
|
||||
to use RMATCH/RRETURN and to release the heap block containing the saved
|
||||
ovector. */
|
||||
|
||||
cbegroup = (*callpat >= OP_SBRA);
|
||||
do
|
||||
{
|
||||
if (cbegroup) mb->match_function_type = MATCH_CBEGROUP;
|
||||
RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
|
||||
mb, eptrb, RM6);
|
||||
memcpy(mb->ovector, new_recursive.offset_save,
|
||||
new_recursive.saved_max * sizeof(PCRE2_OFFSET));
|
||||
memcpy(mb->ovector, new_recursive.ovec_save,
|
||||
new_recursive.saved_max * sizeof(PCRE2_SIZE));
|
||||
mb->capture_last = new_recursive.saved_capture_last;
|
||||
mb->recursive = new_recursive.prevrec;
|
||||
|
||||
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
|
||||
{
|
||||
if (new_recursive.offset_save != stacksave)
|
||||
mb->memctl.free(new_recursive.offset_save, mb->memctl.memory_data);
|
||||
|
||||
/* Set where we got to in the subject, and reset the start in case
|
||||
mb->memctl.free(new_recursive.ovec_save, mb->memctl.memory_data);
|
||||
|
||||
/* Set where we got to in the subject, and reset the start, in case
|
||||
it was changed by \K. This *is* propagated back out of a recursion,
|
||||
for Perl compatibility. */
|
||||
|
||||
|
||||
eptr = mb->end_match_ptr;
|
||||
mstart = mb->start_match_ptr;
|
||||
goto RECURSION_MATCHED; /* Exit loop; end processing */
|
||||
}
|
||||
|
||||
|
||||
/* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
|
||||
recursion; they cause a NOMATCH for the entire recursion. These codes
|
||||
are defined in a range that can be tested for. */
|
||||
|
||||
|
||||
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
|
||||
/* Any return code other than NOMATCH is an error. */
|
||||
|
||||
if (rrc != MATCH_NOMATCH)
|
||||
{
|
||||
if (new_recursive.offset_save != stacksave)
|
||||
mb->memctl.free(new_recursive.offset_save, mb->memctl.memory_data);
|
||||
RRETURN(rrc);
|
||||
{
|
||||
rrc = MATCH_NOMATCH;
|
||||
goto RECURSION_RETURN;
|
||||
}
|
||||
|
||||
|
||||
/* Any return code other than NOMATCH is an error. */
|
||||
|
||||
if (rrc != MATCH_NOMATCH) goto RECURSION_RETURN;
|
||||
mb->recursive = &new_recursive;
|
||||
callpat += GET(callpat, 1);
|
||||
}
|
||||
while (*callpat == OP_ALT);
|
||||
|
||||
|
||||
RECURSION_RETURN:
|
||||
mb->recursive = new_recursive.prevrec;
|
||||
if (new_recursive.offset_save != stacksave)
|
||||
mb->memctl.free(new_recursive.offset_save, mb->memctl.memory_data);
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
mb->memctl.free(new_recursive.ovec_save, mb->memctl.memory_data);
|
||||
RRETURN(rrc);
|
||||
}
|
||||
|
||||
RECURSION_MATCHED:
|
||||
|
||||
RECURSION_MATCHED:
|
||||
break;
|
||||
|
||||
/* An alternation is the end of a branch; scan along to find the end of the
|
||||
|
@ -1840,8 +1944,8 @@ for (;;)
|
|||
|
||||
if (offset > offset_top)
|
||||
{
|
||||
register PCRE2_OFFSET *iptr = mb->ovector + offset_top;
|
||||
register PCRE2_OFFSET *iend = mb->ovector + offset;
|
||||
register PCRE2_SIZE *iptr = mb->ovector + offset_top;
|
||||
register PCRE2_SIZE *iend = mb->ovector + offset;
|
||||
while (iptr < iend) *iptr++ = PCRE2_UNSET;
|
||||
}
|
||||
|
||||
|
@ -6023,7 +6127,7 @@ for (;;)
|
|||
match(), the RRETURN() macro jumps here. The number that is saved in
|
||||
frame->Xwhere indicates which label we actually want to return to. */
|
||||
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
#define LBL(val) case val: goto L_RM##val;
|
||||
HEAP_RETURN:
|
||||
switch (frame->Xwhere)
|
||||
|
@ -6048,7 +6152,7 @@ switch (frame->Xwhere)
|
|||
return PCRE2_ERROR_INTERNAL;
|
||||
}
|
||||
#undef LBL
|
||||
#endif /* NO_RECURSE */
|
||||
#endif /* HEAP_MATCH_RECURSE */
|
||||
}
|
||||
|
||||
|
||||
|
@ -6058,7 +6162,7 @@ switch (frame->Xwhere)
|
|||
|
||||
Undefine all the macros that were defined above to handle this. */
|
||||
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
#undef eptr
|
||||
#undef ecode
|
||||
#undef mstart
|
||||
|
@ -6091,10 +6195,9 @@ Undefine all the macros that were defined above to handle this. */
|
|||
#undef save_offset1
|
||||
#undef save_offset2
|
||||
#undef save_offset3
|
||||
#undef stacksave
|
||||
|
||||
#undef newptrb
|
||||
#endif /* NO_RECURSE */
|
||||
#endif /* HEAP_MATCH_RECURSE */
|
||||
|
||||
/* These two are defined as macros in both cases */
|
||||
|
||||
|
@ -6105,7 +6208,7 @@ Undefine all the macros that were defined above to handle this. */
|
|||
***************************************************************************/
|
||||
|
||||
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
/*************************************************
|
||||
* Release allocated heap frames *
|
||||
*************************************************/
|
||||
|
@ -6131,7 +6234,7 @@ while (nextframe != NULL)
|
|||
mb->stack_memctl.free(oldframe, mb->stack_memctl.memory_data);
|
||||
}
|
||||
}
|
||||
#endif /* NO_RECURSE */
|
||||
#endif /* HEAP_MATCH_RECURSE */
|
||||
|
||||
|
||||
|
||||
|
@ -6160,8 +6263,8 @@ Returns: > 0 => success; value is the number of ovector pairs filled
|
|||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, int length,
|
||||
PCRE2_OFFSET start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
|
||||
pcre2_match_context *mcontext)
|
||||
{
|
||||
int rc;
|
||||
|
@ -6198,23 +6301,24 @@ is used below, and it expects NLBLOCK to be defined as a pointer. */
|
|||
match_block actual_match_block;
|
||||
match_block *mb = &actual_match_block;
|
||||
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
heapframe frame_zero;
|
||||
frame_zero.Xprevframe = NULL; /* Marks the top level */
|
||||
frame_zero.Xnextframe = NULL; /* None are allocated yet */
|
||||
mb->match_frames_base = &frame_zero;
|
||||
#endif
|
||||
|
||||
/* A negative length implies a zero-terminated subject string. */
|
||||
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
|
||||
subject string. */
|
||||
|
||||
if (length < 0) length = PRIV(strlen)(subject);
|
||||
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
|
||||
|
||||
/* Plausibility checks */
|
||||
|
||||
if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
|
||||
if (code == NULL || subject == NULL || match_data == NULL)
|
||||
return PCRE2_ERROR_NULL;
|
||||
if ((int)start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
||||
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
||||
|
||||
/* Check that the first field in the block is the magic number. If it is not,
|
||||
return with PCRE2_ERROR_BADMAGIC. However, if the magic number is equal to
|
||||
|
@ -6261,7 +6365,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
|||
return match_data->rc;
|
||||
}
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
if (start_offset > 0 && (int)start_offset < length &&
|
||||
if (start_offset > 0 && start_offset < length &&
|
||||
NOT_FIRSTCHAR(subject[start_offset]))
|
||||
return PCRE2_ERROR_BADUTFOFFSET;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
@ -6296,7 +6400,7 @@ if (mcontext == NULL)
|
|||
{
|
||||
mb->callout = NULL;
|
||||
mb->memctl = re->memctl;
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
mb->stack_memctl = re->memctl;
|
||||
#endif
|
||||
}
|
||||
|
@ -6305,7 +6409,7 @@ else
|
|||
mb->callout = mcontext->callout;
|
||||
mb->callout_data = mcontext->callout_data;
|
||||
mb->memctl = mcontext->memctl;
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
mb->stack_memctl = mcontext->stack_memctl;
|
||||
#endif
|
||||
}
|
||||
|
@ -6394,7 +6498,7 @@ offsets, and the top third is working space. */
|
|||
if (re->top_backref >= match_data->oveccount)
|
||||
{
|
||||
ocount = re->top_backref * 3 + 3;
|
||||
mb->ovector = (PCRE2_OFFSET *)(mb->memctl.malloc(ocount * sizeof(PCRE2_OFFSET),
|
||||
mb->ovector = (PCRE2_SIZE *)(mb->memctl.malloc(ocount * sizeof(PCRE2_SIZE),
|
||||
mb->memctl.memory_data));
|
||||
if (mb->ovector == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
using_temporary_offsets = TRUE;
|
||||
|
@ -6417,8 +6521,8 @@ in case they inspect these fields. */
|
|||
|
||||
if (ocount > 0)
|
||||
{
|
||||
register PCRE2_OFFSET *iptr = mb->ovector + ocount;
|
||||
register PCRE2_OFFSET *iend = iptr - re->top_bracket;
|
||||
register PCRE2_SIZE *iptr = mb->ovector + ocount;
|
||||
register PCRE2_SIZE *iend = iptr - re->top_bracket;
|
||||
if (iend < mb->ovector + 2) iend = mb->ovector + 2;
|
||||
while (--iptr >= iend) *iptr = PCRE2_UNSET;
|
||||
mb->ovector[0] = mb->ovector[1] = PCRE2_UNSET;
|
||||
|
@ -6782,7 +6886,7 @@ for(;;)
|
|||
|
||||
ENDLOOP:
|
||||
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
release_match_heapframes(&frame_zero, mb);
|
||||
#endif
|
||||
|
||||
|
@ -6810,7 +6914,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
|
|||
if (arg_offset_max >= 4)
|
||||
{
|
||||
memcpy(match_data->ovector + 2, mb->ovector + 2,
|
||||
(arg_offset_max - 2) * sizeof(PCRE2_OFFSET));
|
||||
(arg_offset_max - 2) * sizeof(PCRE2_SIZE));
|
||||
}
|
||||
if (mb->end_offset_top > arg_offset_max) mb->capture_last |= OVFLBIT;
|
||||
mb->memctl.free(mb->ovector, mb->memctl.memory_data);
|
||||
|
@ -6834,7 +6938,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
|
|||
|
||||
if (mb->end_offset_top/2 <= re->top_bracket)
|
||||
{
|
||||
register PCRE2_OFFSET *iptr, *iend;
|
||||
register PCRE2_SIZE *iptr, *iend;
|
||||
int resetcount = re->top_bracket + 1;
|
||||
if (resetcount > match_data->oveccount) resetcount = match_data->oveccount;
|
||||
iptr = match_data->ovector + mb->end_offset_top;
|
||||
|
|
|
@ -55,7 +55,7 @@ PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
|||
pcre2_match_data_create(uint32_t oveccount, pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_match_data *yield = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_match_data) + 3*oveccount*sizeof(PCRE2_OFFSET),
|
||||
sizeof(pcre2_match_data) + 3*oveccount*sizeof(PCRE2_SIZE),
|
||||
(pcre2_memctl *)gcontext);
|
||||
yield->oveccount = oveccount;
|
||||
return yield;
|
||||
|
@ -94,7 +94,7 @@ if (match_data != NULL)
|
|||
* Get left-most code unit in match *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_OFFSET PCRE2_CALL_CONVENTION
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||
pcre2_get_leftchar(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->leftchar;
|
||||
|
@ -118,7 +118,7 @@ return match_data->mark;
|
|||
* Get pointer to ovector *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_OFFSET * PCRE2_CALL_CONVENTION
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE * PCRE2_CALL_CONVENTION
|
||||
pcre2_get_ovector_pointer(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->ovector;
|
||||
|
@ -142,7 +142,7 @@ return match_data->oveccount;
|
|||
* Get right-most code unit in match *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_OFFSET PCRE2_CALL_CONVENTION
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||
pcre2_get_rightchar(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->rightchar;
|
||||
|
@ -154,7 +154,7 @@ return match_data->rightchar;
|
|||
* Get starting code unit in match *
|
||||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN PCRE2_OFFSET PCRE2_CALL_CONVENTION
|
||||
PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
|
||||
pcre2_get_startchar(pcre2_match_data *match_data)
|
||||
{
|
||||
return match_data->startchar;
|
||||
|
|
|
@ -335,7 +335,7 @@ size_t *lensp;
|
|||
pcre2_memctl *memp;
|
||||
PCRE2_UCHAR **listp;
|
||||
PCRE2_UCHAR *sp;
|
||||
PCRE2_OFFSET *ovector;
|
||||
PCRE2_SIZE *ovector;
|
||||
|
||||
if ((count = match_data->rc) < 0) return count;
|
||||
|
||||
|
|
|
@ -58,7 +58,7 @@ strings. */
|
|||
/* This function should never be called when UTF is not supported. */
|
||||
|
||||
int
|
||||
PRIV(valid_utf)(PCRE2_SPTR string, int length, PCRE2_OFFSET *erroroffset)
|
||||
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
|
||||
{
|
||||
(void)string;
|
||||
(void)length;
|
||||
|
@ -81,7 +81,7 @@ invalid string are then undefined.
|
|||
|
||||
Arguments:
|
||||
string points to the string
|
||||
length length of string, or -1 if the string is zero-terminated
|
||||
length length of string
|
||||
errp pointer to an error position offset variable
|
||||
|
||||
Returns: == 0 if the string is a valid UTF string
|
||||
|
@ -89,17 +89,11 @@ Returns: == 0 if the string is a valid UTF string
|
|||
*/
|
||||
|
||||
int
|
||||
PRIV(valid_utf)(PCRE2_SPTR string, int length, PCRE2_OFFSET *erroroffset)
|
||||
PRIV(valid_utf)(PCRE2_SPTR string, PCRE2_SIZE length, PCRE2_SIZE *erroroffset)
|
||||
{
|
||||
register PCRE2_SPTR p;
|
||||
register uint32_t c;
|
||||
|
||||
if (length < 0)
|
||||
{
|
||||
for (p = string; *p != 0; p++);
|
||||
length = (int)(p - string);
|
||||
}
|
||||
|
||||
/* ----------------- Check a UTF-8 string ----------------- */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
|
@ -155,7 +149,7 @@ for (p = string; length-- > 0; p++)
|
|||
}
|
||||
|
||||
ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes (1-5) */
|
||||
if (length < (int)ab) /* Missing bytes */
|
||||
if (length < ab) /* Missing bytes */
|
||||
{
|
||||
*erroroffset = (int)(p - string);
|
||||
switch(ab - length)
|
||||
|
|
|
@ -77,8 +77,8 @@ int utf8;
|
|||
uint32_t option_bits;
|
||||
uint32_t newline;
|
||||
|
||||
PCRE2_OFFSET erroroffset;
|
||||
PCRE2_OFFSET *ovector;
|
||||
PCRE2_SIZE erroroffset;
|
||||
PCRE2_SIZE *ovector;
|
||||
|
||||
size_t subject_length;
|
||||
pcre2_match_data *match_data;
|
||||
|
@ -314,7 +314,7 @@ crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
|
|||
for (;;)
|
||||
{
|
||||
uint32_t options = 0; /* Normally no options */
|
||||
PCRE2_OFFSET start_offset = ovector[1]; /* Start at end of previous match */
|
||||
PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */
|
||||
|
||||
/* If the previous match was for an empty string, we are finished if we are
|
||||
at the end of the subject. Otherwise, arrange to run another match at the
|
||||
|
|
|
@ -179,7 +179,7 @@ static uint32_t recursion_limit = 0;
|
|||
static pcre2_compile_context *compile_context;
|
||||
static pcre2_match_context *match_context;
|
||||
static pcre2_match_data *match_data;
|
||||
static PCRE2_OFFSET *offsets;
|
||||
static PCRE2_SIZE *offsets;
|
||||
|
||||
static BOOL count_only = FALSE;
|
||||
static BOOL do_colour = FALSE;
|
||||
|
@ -1736,14 +1736,14 @@ while (ptr < endptr)
|
|||
|
||||
if (line_offsets)
|
||||
fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
|
||||
offsets[1] - offsets[0]);
|
||||
(int)(offsets[1] - offsets[0]));
|
||||
|
||||
/* Handle --file-offsets */
|
||||
|
||||
else if (file_offsets)
|
||||
fprintf(stdout, "%d,%d\n",
|
||||
(int)(filepos + matchptr + offsets[0] - ptr),
|
||||
offsets[1] - offsets[0]);
|
||||
(int)(offsets[1] - offsets[0]));
|
||||
|
||||
/* Handle --only-matching, which may occur many times */
|
||||
|
||||
|
@ -2451,7 +2451,7 @@ compile_pattern(patstr *p, int options, int popts, int fromfile,
|
|||
const char *fromtext, int count)
|
||||
{
|
||||
unsigned char buffer[PATBUFSIZE];
|
||||
PCRE2_OFFSET erroffset;
|
||||
PCRE2_SIZE erroffset;
|
||||
char *ps = p->string;
|
||||
unsigned int patlen = strlen(ps);
|
||||
int errcode;
|
||||
|
@ -2485,16 +2485,16 @@ pcre2_get_error_message(errcode, buffer, PATBUFSIZE);
|
|||
if (fromfile)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
|
||||
"at offset %d: %s\n", count, fromtext, erroffset, buffer);
|
||||
"at offset %d: %s\n", count, fromtext, (int)erroffset, buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (count == 0)
|
||||
fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
|
||||
fromtext, erroffset, buffer);
|
||||
fromtext, (int)erroffset, buffer);
|
||||
else
|
||||
fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
|
||||
ordin(count), fromtext, erroffset, buffer);
|
||||
ordin(count), fromtext, (int)erroffset, buffer);
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
|
|
|
@ -203,7 +203,7 @@ Returns: 0 on success
|
|||
PCRE2POSIX_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
regcomp(regex_t *preg, const char *pattern, int cflags)
|
||||
{
|
||||
PCRE2_OFFSET erroffset;
|
||||
PCRE2_SIZE erroffset;
|
||||
int errorcode;
|
||||
int options = 0;
|
||||
int re_nsub = 0;
|
||||
|
|
|
@ -401,7 +401,7 @@ typedef struct modstruct {
|
|||
uint16_t which;
|
||||
uint16_t type;
|
||||
uint32_t value;
|
||||
PCRE2_OFFSET offset;
|
||||
PCRE2_SIZE offset;
|
||||
} modstruct;
|
||||
|
||||
static modstruct modlist[] = {
|
||||
|
@ -1758,7 +1758,7 @@ free(block);
|
|||
|
||||
/* For recursion malloc/free, to test stacking calls */
|
||||
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
static void *my_stack_malloc(size_t size, void *data)
|
||||
{
|
||||
void *block = malloc(size);
|
||||
|
@ -1775,7 +1775,7 @@ if (show_memory)
|
|||
fprintf(outfile, "stack_free %p\n", block);
|
||||
free(block);
|
||||
}
|
||||
#endif /* NO_RECURSE */
|
||||
#endif /* HEAP_MATCH_RECURSE */
|
||||
|
||||
|
||||
/*************************************************
|
||||
|
@ -2422,7 +2422,7 @@ static void *
|
|||
check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
|
||||
{
|
||||
void *field = NULL;
|
||||
PCRE2_OFFSET offset = m->offset;
|
||||
PCRE2_SIZE offset = m->offset;
|
||||
|
||||
if (restrict_for_perl_test) switch(m->which)
|
||||
{
|
||||
|
@ -2448,7 +2448,7 @@ switch (m->which)
|
|||
/* Fall through for something that can also be in a match context. In this
|
||||
case the offset is taken from the other field. */
|
||||
|
||||
offset = (PCRE2_OFFSET)(m->value);
|
||||
offset = (PCRE2_SIZE)(m->value);
|
||||
|
||||
case MOD_CTM: /* Match context modifier */
|
||||
if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
|
||||
|
@ -3310,7 +3310,7 @@ uint8_t *p = buffer;
|
|||
const uint8_t *use_tables;
|
||||
unsigned int delimiter = *p++;
|
||||
int patlen, errorcode;
|
||||
PCRE2_OFFSET erroroffset;
|
||||
PCRE2_SIZE erroroffset;
|
||||
|
||||
/* Initialize the context and pattern/data controls for this test from the
|
||||
defaults. */
|
||||
|
@ -4403,7 +4403,7 @@ for (gmatched = 0;; gmatched++)
|
|||
{
|
||||
int i;
|
||||
uint8_t *nptr;
|
||||
PCRE2_OFFSET *ovector;
|
||||
PCRE2_SIZE *ovector;
|
||||
|
||||
/* This is a check against a lunatic return value. */
|
||||
|
||||
|
@ -4439,8 +4439,8 @@ for (gmatched = 0;; gmatched++)
|
|||
ovector = FLD(match_data, ovector);
|
||||
for (i = 0; i < 2*capcount; i += 2)
|
||||
{
|
||||
PCRE2_OFFSET start = ovector[i];
|
||||
PCRE2_OFFSET end = ovector[i+1];
|
||||
PCRE2_SIZE start = ovector[i];
|
||||
PCRE2_SIZE end = ovector[i+1];
|
||||
|
||||
if (start > end)
|
||||
{
|
||||
|
@ -4643,7 +4643,7 @@ for (gmatched = 0;; gmatched++)
|
|||
|
||||
else if (capcount == PCRE2_ERROR_PARTIAL)
|
||||
{
|
||||
PCRE2_OFFSET leftchar = FLD(match_data, leftchar);
|
||||
PCRE2_SIZE leftchar = FLD(match_data, leftchar);
|
||||
fprintf(outfile, "Partial match");
|
||||
if (leftchar != FLD(match_data, startchar))
|
||||
fprintf(outfile, " at offset %d", (int)FLD(match_data, startchar));
|
||||
|
@ -4685,8 +4685,8 @@ for (gmatched = 0;; gmatched++)
|
|||
else if (g_notempty != 0) /* There was a previous null match */
|
||||
{
|
||||
uint16_t nl = FLD(compiled_code, newline_convention);
|
||||
PCRE2_OFFSET start_offset = dat_datctl.offset; /* Where the match was */
|
||||
PCRE2_OFFSET end_offset = start_offset + 1;
|
||||
PCRE2_SIZE start_offset = dat_datctl.offset; /* Where the match was */
|
||||
PCRE2_SIZE end_offset = start_offset + 1;
|
||||
|
||||
if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
|
||||
nl == PCRE2_NEWLINE_ANYCRLF) &&
|
||||
|
@ -4765,7 +4765,7 @@ for (gmatched = 0;; gmatched++)
|
|||
|
||||
if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
|
||||
{
|
||||
PCRE2_OFFSET end_offset = FLD(match_data, ovector)[1];
|
||||
PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
|
||||
|
||||
/* We must now set up for the next iteration of a global search. If we have
|
||||
matched an empty string, first check to see if we are at the end of the
|
||||
|
@ -5278,7 +5278,7 @@ if (test_mode == PCRE8_MODE)
|
|||
default_dat_context8 = pcre2_match_context_create_8(general_context8);
|
||||
dat_context8 = pcre2_match_context_create_8(general_context8);
|
||||
match_data8 = pcre2_match_data_create_8(max_oveccount, general_context8);
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
(void)pcre2_set_recursion_memory_management_8(default_dat_context8,
|
||||
&my_stack_malloc, &my_stack_free, NULL);
|
||||
#endif
|
||||
|
@ -5295,7 +5295,7 @@ if (test_mode == PCRE16_MODE)
|
|||
default_dat_context16 = pcre2_match_context_create_16(general_context16);
|
||||
dat_context16 = pcre2_match_context_create_16(general_context16);
|
||||
match_data16 = pcre2_match_data_create_16(max_oveccount, general_context16);
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
(void)pcre2_set_recursion_memory_management_16(default_dat_context16,
|
||||
&my_stack_malloc, &my_stack_free, NULL);
|
||||
#endif
|
||||
|
@ -5312,7 +5312,7 @@ if (test_mode == PCRE32_MODE)
|
|||
default_dat_context32 = pcre2_match_context_create_32(general_context32);
|
||||
dat_context32 = pcre2_match_context_create_32(general_context32);
|
||||
match_data32 = pcre2_match_data_create_32(max_oveccount, general_context32);
|
||||
#ifdef NO_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
(void)pcre2_set_recursion_memory_management_32(default_dat_context32,
|
||||
&my_stack_malloc, &my_stack_free, NULL);
|
||||
#endif
|
||||
|
|
|
@ -4042,5 +4042,11 @@ a random value. /Ix
|
|||
abxxx
|
||||
ca
|
||||
cd
|
||||
|
||||
# This should test both paths for processing OP_RECURSE.
|
||||
|
||||
/(?(R)a+|(?R)b)/
|
||||
aaaabcde
|
||||
aaaabcde\=ovector=100
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -13671,5 +13671,13 @@ No match
|
|||
cd
|
||||
0:
|
||||
0+ cd
|
||||
|
||||
# This should test both paths for processing OP_RECURSE.
|
||||
|
||||
/(?(R)a+|(?R)b)/
|
||||
aaaabcde
|
||||
0: aaaab
|
||||
aaaabcde\=ovector=100
|
||||
0: aaaab
|
||||
|
||||
# End of testinput2
|
||||
|
|
Loading…
Reference in New Issue