The pcre2_match() function's basic facilities are working (though hardly

tested).
This commit is contained in:
Philip.Hazel 2014-06-28 14:23:18 +00:00
parent 06aa11b428
commit 38b570f99d
12 changed files with 7458 additions and 561 deletions

View File

@ -215,7 +215,6 @@ must all be greater than zero. */
#define PCRE2_ERROR_NULL (-50) #define PCRE2_ERROR_NULL (-50)
#define PCRE2_ERROR_RECURSELOOP (-51) #define PCRE2_ERROR_RECURSELOOP (-51)
#define PCRE2_ERROR_RECURSIONLIMIT (-52) #define PCRE2_ERROR_RECURSIONLIMIT (-52)
#define PCRE2_ERROR_UNSET (-53)
/* Request types for pcre2_pattern_info() */ /* Request types for pcre2_pattern_info() */
@ -309,17 +308,17 @@ typedef struct pcre2_callout_block { \
int version; /* Identifies version of block */ \ int version; /* Identifies version of block */ \
/* ------------------------ Version 0 ------------------------------- */ \ /* ------------------------ Version 0 ------------------------------- */ \
int callout_number; /* Number compiled into pattern */ \ int callout_number; /* Number compiled into pattern */ \
int *offset_vector; /* The offset vector */ \ size_t *offset_vector; /* The offset vector */ \
PCRE2_SPTR subject; /* The subject being matched */ \ PCRE2_SPTR subject; /* The subject being matched */ \
int subject_length; /* The length of the subject */ \ size_t subject_length; /* The length of the subject */ \
int start_match; /* Offset to start of this match attempt */ \ size_t start_match; /* Offset to start of this match attempt */ \
int current_position; /* Where we currently are in the subject */ \ size_t current_position; /* Where we currently are in the subject */ \
int capture_top; /* Max current capture */ \ uint32_t capture_top; /* Max current capture */ \
int capture_last; /* Most recently closed capture */ \ uint32_t capture_last; /* Most recently closed capture */ \
void *callout_data; /* Data passed in with the call */ \ void *callout_data; /* Data passed in with the call */ \
/* ------------------- Added for Version 1 -------------------------- */ \ /* ------------------- Added for Version 1 -------------------------- */ \
int pattern_position; /* Offset to next item in the pattern */ \ size_t pattern_position; /* Offset to next item in the pattern */ \
int next_item_length; /* Length of next item in the pattern */ \ size_t next_item_length; /* Length of next item in the pattern */ \
/* ------------------- Added for Version 2 -------------------------- */ \ /* ------------------- Added for Version 2 -------------------------- */ \
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \ PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
/* ------------------------------------------------------------------ */ \ /* ------------------------------------------------------------------ */ \
@ -381,7 +380,7 @@ PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \ PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \
uint32_t); \ uint32_t); \
PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \ PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
int (*)(pcre2_callout_block *, void *)); \ int (*)(pcre2_callout_block *), void *); \
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \ PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
uint32_t); \ uint32_t); \
PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \ PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \

View File

@ -215,7 +215,6 @@ must all be greater than zero. */
#define PCRE2_ERROR_NULL (-50) #define PCRE2_ERROR_NULL (-50)
#define PCRE2_ERROR_RECURSELOOP (-51) #define PCRE2_ERROR_RECURSELOOP (-51)
#define PCRE2_ERROR_RECURSIONLIMIT (-52) #define PCRE2_ERROR_RECURSIONLIMIT (-52)
#define PCRE2_ERROR_UNSET (-53)
/* Request types for pcre2_pattern_info() */ /* Request types for pcre2_pattern_info() */
@ -309,17 +308,17 @@ typedef struct pcre2_callout_block { \
int version; /* Identifies version of block */ \ int version; /* Identifies version of block */ \
/* ------------------------ Version 0 ------------------------------- */ \ /* ------------------------ Version 0 ------------------------------- */ \
int callout_number; /* Number compiled into pattern */ \ int callout_number; /* Number compiled into pattern */ \
int *offset_vector; /* The offset vector */ \ size_t *offset_vector; /* The offset vector */ \
PCRE2_SPTR subject; /* The subject being matched */ \ PCRE2_SPTR subject; /* The subject being matched */ \
int subject_length; /* The length of the subject */ \ size_t subject_length; /* The length of the subject */ \
int start_match; /* Offset to start of this match attempt */ \ size_t start_match; /* Offset to start of this match attempt */ \
int current_position; /* Where we currently are in the subject */ \ size_t current_position; /* Where we currently are in the subject */ \
int capture_top; /* Max current capture */ \ uint32_t capture_top; /* Max current capture */ \
int capture_last; /* Most recently closed capture */ \ uint32_t capture_last; /* Most recently closed capture */ \
void *callout_data; /* Data passed in with the call */ \ void *callout_data; /* Data passed in with the call */ \
/* ------------------- Added for Version 1 -------------------------- */ \ /* ------------------- Added for Version 1 -------------------------- */ \
int pattern_position; /* Offset to next item in the pattern */ \ size_t pattern_position; /* Offset to next item in the pattern */ \
int next_item_length; /* Length of next item in the pattern */ \ size_t next_item_length; /* Length of next item in the pattern */ \
/* ------------------- Added for Version 2 -------------------------- */ \ /* ------------------- Added for Version 2 -------------------------- */ \
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \ PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
/* ------------------------------------------------------------------ */ \ /* ------------------------------------------------------------------ */ \
@ -381,7 +380,7 @@ PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \ PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \
uint32_t); \ uint32_t); \
PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \ PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
int (*)(pcre2_callout_block *, void *)); \ int (*)(pcre2_callout_block *), void *); \
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \ PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
uint32_t); \ uint32_t); \
PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \ PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \

View File

@ -559,14 +559,14 @@ which case the base cannot be possessified.
Arguments: Arguments:
code points to the byte code code points to the byte code
utf TRUE in UTF mode utf TRUE in UTF mode
cd compile data block cb compile data block
base_list the data list of the base opcode base_list the data list of the base opcode
Returns: TRUE if the auto-possessification is possible Returns: TRUE if the auto-possessification is possible
*/ */
static BOOL static BOOL
compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_data *cd, compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb,
const uint32_t *base_list, PCRE2_SPTR base_end) const uint32_t *base_list, PCRE2_SPTR base_end)
{ {
PCRE2_UCHAR c; PCRE2_UCHAR c;
@ -654,7 +654,7 @@ for(;;)
while (*next_code == OP_ALT) while (*next_code == OP_ALT)
{ {
if (!compare_opcodes(code, utf, cd, base_list, base_end)) return FALSE; if (!compare_opcodes(code, utf, cb, base_list, base_end)) return FALSE;
code = next_code + 1 + LINK_SIZE; code = next_code + 1 + LINK_SIZE;
next_code += GET(next_code, 1); next_code += GET(next_code, 1);
} }
@ -674,7 +674,7 @@ for(;;)
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */ /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
next_code += 1 + LINK_SIZE; next_code += 1 + LINK_SIZE;
if (!compare_opcodes(next_code, utf, cd, base_list, base_end)) if (!compare_opcodes(next_code, utf, cb, base_list, base_end))
return FALSE; return FALSE;
code += PRIV(OP_lengths)[c]; code += PRIV(OP_lengths)[c];
@ -686,7 +686,7 @@ for(;;)
/* Check for a supported opcode, and load its properties. */ /* Check for a supported opcode, and load its properties. */
code = get_chr_property_list(code, utf, cd->fcc, list); code = get_chr_property_list(code, utf, cb->fcc, list);
if (code == NULL) return FALSE; /* Unsupported */ if (code == NULL) return FALSE; /* Unsupported */
/* If either opcode is a small character list, set pointers for comparing /* If either opcode is a small character list, set pointers for comparing
@ -755,21 +755,21 @@ for(;;)
invert_bits = TRUE; invert_bits = TRUE;
/* Fall through */ /* Fall through */
case OP_DIGIT: case OP_DIGIT:
set2 = (uint8_t *)(cd->cbits + cbit_digit); set2 = (uint8_t *)(cb->cbits + cbit_digit);
break; break;
case OP_NOT_WHITESPACE: case OP_NOT_WHITESPACE:
invert_bits = TRUE; invert_bits = TRUE;
/* Fall through */ /* Fall through */
case OP_WHITESPACE: case OP_WHITESPACE:
set2 = (uint8_t *)(cd->cbits + cbit_space); set2 = (uint8_t *)(cb->cbits + cbit_space);
break; break;
case OP_NOT_WORDCHAR: case OP_NOT_WORDCHAR:
invert_bits = TRUE; invert_bits = TRUE;
/* Fall through */ /* Fall through */
case OP_WORDCHAR: case OP_WORDCHAR:
set2 = (uint8_t *)(cd->cbits + cbit_word); set2 = (uint8_t *)(cb->cbits + cbit_word);
break; break;
default: default:
@ -963,27 +963,27 @@ for(;;)
set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
case OP_DIGIT: case OP_DIGIT:
if (chr < 256 && (cd->ctypes[chr] & ctype_digit) != 0) return FALSE; if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
break; break;
case OP_NOT_DIGIT: case OP_NOT_DIGIT:
if (chr > 255 || (cd->ctypes[chr] & ctype_digit) == 0) return FALSE; if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
break; break;
case OP_WHITESPACE: case OP_WHITESPACE:
if (chr < 256 && (cd->ctypes[chr] & ctype_space) != 0) return FALSE; if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
break; break;
case OP_NOT_WHITESPACE: case OP_NOT_WHITESPACE:
if (chr > 255 || (cd->ctypes[chr] & ctype_space) == 0) return FALSE; if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
break; break;
case OP_WORDCHAR: case OP_WORDCHAR:
if (chr < 255 && (cd->ctypes[chr] & ctype_word) != 0) return FALSE; if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
break; break;
case OP_NOT_WORDCHAR: case OP_NOT_WORDCHAR:
if (chr > 255 || (cd->ctypes[chr] & ctype_word) == 0) return FALSE; if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
break; break;
case OP_HSPACE: case OP_HSPACE:
@ -1095,13 +1095,13 @@ if appropriate. This function modifies the compiled opcode!
Arguments: Arguments:
code points to start of the byte code code points to start of the byte code
utf TRUE in UTF mode utf TRUE in UTF mode
cd compile data block cb compile data block
Returns: nothing Returns: nothing
*/ */
void void
PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_data *cd) PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
{ {
register PCRE2_UCHAR c; register PCRE2_UCHAR c;
PCRE2_SPTR end; PCRE2_SPTR end;
@ -1116,10 +1116,10 @@ for (;;)
{ {
c -= get_repeat_base(c) - OP_STAR; c -= get_repeat_base(c) - OP_STAR;
end = (c <= OP_MINUPTO) ? end = (c <= OP_MINUPTO) ?
get_chr_property_list(code, utf, cd->fcc, list) : NULL; get_chr_property_list(code, utf, cb->fcc, list) : NULL;
list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO; list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
if (end != NULL && compare_opcodes(end, utf, cd, list, end)) if (end != NULL && compare_opcodes(end, utf, cb, list, end))
{ {
switch(c) switch(c)
{ {
@ -1171,11 +1171,11 @@ for (;;)
if (c >= OP_CRSTAR && c <= OP_CRMINRANGE) if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
{ {
/* end must not be NULL. */ /* end must not be NULL. */
end = get_chr_property_list(code, utf, cd->fcc, list); end = get_chr_property_list(code, utf, cb->fcc, list);
list[1] = (c & 1) == 0; list[1] = (c & 1) == 0;
if (compare_opcodes(end, utf, cd, list, end)) if (compare_opcodes(end, utf, cb, list, end))
{ {
switch (c) switch (c)
{ {

File diff suppressed because it is too large Load Diff

View File

@ -172,6 +172,7 @@ mcontext->stack_malloc = mcontext->malloc;
mcontext->stack_free = mcontext->free; mcontext->stack_free = mcontext->free;
#endif #endif
mcontext->callout = NULL; mcontext->callout = NULL;
mcontext->callout_data = NULL;
mcontext->newline_convention = 0; mcontext->newline_convention = 0;
mcontext->bsr_convention = 0; mcontext->bsr_convention = 0;
mcontext->match_limit = MATCH_LIMIT; mcontext->match_limit = MATCH_LIMIT;
@ -368,9 +369,10 @@ switch(newline)
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_callout(pcre2_match_context *mcontext, pcre2_set_callout(pcre2_match_context *mcontext,
int (*callout)(pcre2_callout_block *, void *)) int (*callout)(pcre2_callout_block *), void *callout_data)
{ {
mcontext->callout = callout; mcontext->callout = callout;
mcontext->callout_data = callout_data;
return 1; return 1;
} }

View File

@ -228,8 +228,6 @@ static const char match_error_texts[] =
"NULL argument passed\0" "NULL argument passed\0"
"nested recursion at the same subject position\0" "nested recursion at the same subject position\0"
"recursion limit exceeded\0" "recursion limit exceeded\0"
"unknown opcode - pattern overwritten?\0"
"value unset\0" /* Used by pcre2_pattern_info() */
; ;

View File

@ -522,9 +522,7 @@ bytes in a code unit in that mode. */
#define PCRE2_JCHANGED 0x00000400 /* j option used in pattern */ #define PCRE2_JCHANGED 0x00000400 /* j option used in pattern */
#define PCRE2_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */ #define PCRE2_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */
#define PCRE2_HASTHEN 0x00001000 /* pattern contains (*THEN) */ #define PCRE2_HASTHEN 0x00001000 /* pattern contains (*THEN) */
#define PCRE2_MLSET 0x00002000 /* match limit set by pattern */ #define PCRE2_MATCH_EMPTY 0x00002000 /* pattern can match empty string */
#define PCRE2_RLSET 0x00004000 /* recursion limit set by pattern */
#define PCRE2_MATCH_EMPTY 0x00008000 /* pattern can match empty string */
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32) #define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
@ -540,7 +538,7 @@ endianness. */
/* The maximum remaining length of subject we are prepared to search for a /* The maximum remaining length of subject we are prepared to search for a
req_unit match. */ req_unit match. */
#define REQ_UNIT_MAX 1000 #define REQ_CU_MAX 1000
/* Bit definitions for entries in the pcre_ctypes table. */ /* Bit definitions for entries in the pcre_ctypes table. */
@ -1816,8 +1814,10 @@ compiling the library, PCRE2_CODE_UNIT_WIDTH will be defined, and we can
include them at the appropriate width, after setting up suffix macros for the include them at the appropriate width, after setting up suffix macros for the
private structures. */ private structures. */
#define compile_data PCRE2_SUFFIX(compile_data_)
#define branch_chain PCRE2_SUFFIX(branch_chain_) #define branch_chain PCRE2_SUFFIX(branch_chain_)
#define compile_block PCRE2_SUFFIX(compile_block_)
#define dfa_match_block PCRE2_SUFFIX(dfa_match_block_)
#define match_block PCRE2_SUFFIX(match_block_)
#define named_group PCRE2_SUFFIX(named_group_) #define named_group PCRE2_SUFFIX(named_group_)
#include "pcre2_intmodedep.h" #include "pcre2_intmodedep.h"
@ -1845,10 +1845,11 @@ is available. */
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_) #define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_) #define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_)
extern void _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL, const compile_data *); extern void _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL, const compile_block *);
extern void _pcre2_compile_context_init(pcre2_compile_context *, BOOL); extern void _pcre2_compile_context_init(pcre2_compile_context *, BOOL);
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int); extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
extern BOOL _pcre2_is_newline(PCRE2_SPTR, int, PCRE2_SPTR, int *, BOOL); extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
BOOL);
extern void _pcre2_match_context_init(pcre2_match_context *, BOOL); extern void _pcre2_match_context_init(pcre2_match_context *, BOOL);
extern void *_pcre2_memctl_malloc(size_t, size_t, pcre2_memctl *); extern void *_pcre2_memctl_malloc(size_t, size_t, pcre2_memctl *);
extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *); extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *);
@ -1859,7 +1860,8 @@ extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t);
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t); extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
extern int _pcre2_study(pcre2_real_code *); extern int _pcre2_study(pcre2_real_code *);
extern int _pcre2_valid_utf(PCRE2_SPTR, int, size_t *); extern int _pcre2_valid_utf(PCRE2_SPTR, int, size_t *);
extern BOOL _pcre2_was_newline(PCRE2_SPTR, int, PCRE2_SPTR, int *, BOOL); extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
BOOL);
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL); extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
#endif /* PCRE2_CODE_UNIT_WIDTH */ #endif /* PCRE2_CODE_UNIT_WIDTH */

View File

@ -77,6 +77,7 @@ just to undefine them all. */
#undef PUT2INC #undef PUT2INC
#undef PUTCHAR #undef PUTCHAR
#undef PUTINC #undef PUTINC
#undef TABLE_GET
@ -197,8 +198,12 @@ arithmetic results in a signed value. Hence the cast. */
#define PUT2(a,n,d) a[n] = d #define PUT2(a,n,d) a[n] = d
#endif #endif
/* Other macros that are different for 8-bit mode. The maximum length of a MARK /* Other macros that are different for 8-bit mode. The MAX_255 macro checks
name must fit in one code unit; currently it is set to 255 or 65535. */ whether its argument is less than 256. The maximum length of a MARK name must
fit in one code unit; currently it is set to 255 or 65535. The TABLE_GET macro
is used to access elements of tables containing exactly 256 items. When code
points can be greater than 255, a check is needed before accessing these
tables. */
#if PCRE2_CODE_UNIT_WIDTH == 8 #if PCRE2_CODE_UNIT_WIDTH == 8
#define MAX_255(c) TRUE #define MAX_255(c) TRUE
@ -206,11 +211,13 @@ name must fit in one code unit; currently it is set to 255 or 65535. */
#ifdef SUPPORT_UTF #ifdef SUPPORT_UTF
#define SUPPORT_WIDE_CHARS #define SUPPORT_WIDE_CHARS
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UTF */
#define TABLE_GET(c, table, default) ((table)[c])
#else /* Code units are 16 or 32 bits */ #else /* Code units are 16 or 32 bits */
#define MAX_255(c) ((c) <= 255u) #define MAX_255(c) ((c) <= 255u)
#define MAX_MARK ((1u << 16) - 1) #define MAX_MARK ((1u << 16) - 1)
#define SUPPORT_WIDE_CHARS #define SUPPORT_WIDE_CHARS
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
#endif #endif
@ -557,7 +564,8 @@ typedef struct pcre2_real_match_context {
void * (*stack_malloc)(size_t, void *); void * (*stack_malloc)(size_t, void *);
void (*stack_free)(void *, void *); void (*stack_free)(void *, void *);
#endif #endif
int (*callout)(pcre2_callout_block *, void *); int (*callout)(pcre2_callout_block *);
void *callout_data;
uint16_t bsr_convention; uint16_t bsr_convention;
uint16_t newline_convention; uint16_t newline_convention;
uint32_t match_limit; uint32_t match_limit;
@ -632,7 +640,7 @@ typedef struct named_group {
/* Structure for passing "static" information around between the functions /* Structure for passing "static" information around between the functions
doing the compiling, so that they are thread-safe. */ doing the compiling, so that they are thread-safe. */
typedef struct compile_data { typedef struct compile_block {
pcre2_real_compile_context *cx; /* Points to the compile context */ pcre2_real_compile_context *cx; /* Points to the compile context */
const uint8_t *lcc; /* Points to lower casing table */ const uint8_t *lcc; /* Points to lower casing table */
const uint8_t *fcc; /* Points to case-flipping table */ const uint8_t *fcc; /* Points to case-flipping table */
@ -643,32 +651,131 @@ typedef struct compile_data {
PCRE2_SPTR start_pattern; /* The start of the pattern */ PCRE2_SPTR start_pattern; /* The start of the pattern */
PCRE2_SPTR end_pattern; /* The end of the pattern */ PCRE2_SPTR end_pattern; /* The end of the pattern */
PCRE2_UCHAR *hwm; /* High watermark of workspace */ PCRE2_UCHAR *hwm; /* High watermark of workspace */
PCRE2_UCHAR *name_table; /* The name/number table */
size_t workspace_size; /* Size of workspace */
uint16_t names_found; /* Number of entries so far */
uint16_t name_entry_size; /* Size of each entry */
open_capitem *open_caps; /* Chain of open capture items */ open_capitem *open_caps; /* Chain of open capture items */
named_group *named_groups; /* Points to vector in pre-compile */ named_group *named_groups; /* Points to vector in pre-compile */
PCRE2_UCHAR *name_table; /* The name/number table */ uint32_t named_group_list_size; /* Number of entries in the list */
int names_found; /* Number of entries so far */
int name_entry_size; /* Size of each entry */
int named_group_list_size; /* Number of entries in the list */
int workspace_size; /* Size of workspace */
unsigned int bracount; /* Count of capturing parens as we compile */
int final_bracount; /* Saved value after first pass */
int max_lookbehind; /* Maximum lookbehind (characters) */
int top_backref; /* Maximum back reference */
unsigned int backref_map; /* Bitmap of low back refs */
unsigned int namedrefcount; /* Number of backreferences by name */
int parens_depth; /* Depth of nested parentheses */
int assert_depth; /* Depth of nested assertions */
uint32_t external_options; /* External (initial) options */ uint32_t external_options; /* External (initial) options */
uint32_t external_flags; /* External flag bits to be set */ uint32_t external_flags; /* External flag bits to be set */
uint32_t bracount; /* Count of capturing parens as we compile */
uint32_t final_bracount; /* Saved value after first pass */
uint32_t top_backref; /* Maximum back reference */
uint32_t backref_map; /* Bitmap of low back refs */
uint32_t namedrefcount; /* Number of backreferences by name */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
int max_lookbehind; /* Maximum lookbehind (characters) */
int parens_depth; /* Depth of nested parentheses */
int assert_depth; /* Depth of nested assertions */
int req_varyopt; /* "After variable item" flag for reqbyte */ int req_varyopt; /* "After variable item" flag for reqbyte */
BOOL had_accept; /* (*ACCEPT) encountered */ BOOL had_accept; /* (*ACCEPT) encountered */
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
BOOL check_lookbehind; /* Lookbehinds need later checking */ BOOL check_lookbehind; /* Lookbehinds need later checking */
BOOL dupnames; /* Duplicate names exist */ BOOL dupnames; /* Duplicate names exist */
} compile_block;
/* Structure for items in a linked list that represents an explicit recursive
call within the pattern; used by pcre_match(). */
typedef struct recursion_info {
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
unsigned int group_num; /* Number of group that was called */
size_t *offset_save; /* Pointer to start of saved offsets */
uint32_t saved_max; /* Number of saved offsets */
uint32_t saved_capture_last; /* Last capture number */
PCRE2_SPTR subject_position; /* Position at start of recursion */
} recursion_info;
/* A similar structure for pcre_dfa_match(). */
typedef struct dfa_recursion_info {
struct dfa_recursion_info *prevrec;
uint32_t group_num;
PCRE2_SPTR subject_position;
} dfa_recursion_info;
/* Structure for building a chain of data for holding the values of the subject
pointer at the start of each subpattern, so as to detect when an empty string
has been matched by a subpattern - to break infinite loops; used by
pcre2_match(). */
typedef struct eptrblock {
struct eptrblock *epb_prev;
PCRE2_SPTR epb_saved_eptr;
} eptrblock;
/* Structure for passing "static" information around between the functions
doing traditional NFA matching (pcre2_match() and friends). */
typedef struct match_block {
pcre2_memctl memctl;
unsigned long int match_call_count; /* As it says */
unsigned long int match_limit; /* As it says */
unsigned long int match_limit_recursion; /* As it says */
BOOL hitend; /* Hit the end of the subject at some point */
BOOL hasthen; /* Pattern contains (*THEN) */
const uint8_t *lcc; /* Points to lower casing table */
const uint8_t *fcc; /* Points to case-flipping table */
const uint8_t *ctypes; /* Points to table of type maps */
size_t *ovector; /* Pointer to the offset vector */
size_t offset_end; /* One past the end */
size_t offset_max; /* The maximum usable for return data */
size_t start_offset; /* The start offset value */
size_t end_offset_top; /* Highwater mark at end of match */
uint16_t partial; /* PARTIAL options */
uint16_t bsr_convention; /* \R interpretation */
uint16_t name_count; /* Number of names in name table */
uint16_t name_entry_size; /* Size of entry in names table */
PCRE2_SPTR name_table; /* Table of group names */
PCRE2_SPTR start_code; /* For use when recursing */
PCRE2_SPTR start_subject; /* Start of the subject string */
PCRE2_SPTR end_subject; /* End of the subject string */
PCRE2_SPTR start_match_ptr; /* Start of matched string */
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
PCRE2_SPTR once_target; /* Where to back up to for atomic groups */
uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */
uint32_t capture_last; /* Most recent capture number + overflow flag */
uint32_t skip_arg_count; /* For counting SKIP_ARGs */
uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */
uint32_t match_function_type; /* Set for certain special calls of match() */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
recursion_info *recursive; /* Linked list of recursion data */
void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
#ifdef NO_RECURSE
void *match_frames_base; /* For remembering malloc'd frames */
#endif
} match_block;
/* A similar structure is used for the same purpose by the DFA matching
functions. */
typedef struct dfa_match_block {
PCRE2_SPTR start_code; /* Start of the compiled pattern */
PCRE2_SPTR start_subject ; /* Start of the subject string */
PCRE2_SPTR end_subject; /* End of subject string */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
const uint8_t *tables; /* Character tables */
int start_offset; /* The start offset value */
uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */
int nltype; /* Newline type */ int nltype; /* Newline type */
int nllen; /* Newline string length */ int nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */ PCRE2_UCHAR nl[4]; /* Newline string when fixed */
} compile_data; void *callout_data; /* To pass back to callouts */
dfa_recursion_info *recursive; /* Linked list of recursion data */
} dfa_match_block;
#endif /* PCRE2_PCRE2TEST */ #endif /* PCRE2_PCRE2TEST */

File diff suppressed because it is too large Load Diff

View File

@ -60,8 +60,10 @@ http://unicode.org/unicode/reports/tr18/. */
* Check for newline at given position * * Check for newline at given position *
*************************************************/ *************************************************/
/* It is guaranteed that the initial value of ptr is less than the end of the /* This function is called only via the IS_NEWLINE macro, which does so only
string that is being processed. when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
pointed to by ptr is less than the end of the string.
Arguments: Arguments:
ptr pointer to possible newline ptr pointer to possible newline
@ -74,27 +76,30 @@ Returns: TRUE or FALSE
*/ */
BOOL BOOL
PRIV(is_newline)(PCRE2_SPTR ptr, int type, PCRE2_SPTR endptr, int *lenptr, PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
BOOL utf) uint32_t *lenptr, BOOL utf)
{ {
uint32_t c; uint32_t c;
#ifdef SUPPORT_UTF #ifdef SUPPORT_UTF
if (utf) { GETCHAR(c, ptr); } else if (utf) { GETCHAR(c, ptr); } else c = *ptr;
#else #else
(void)utf; (void)utf;
c = *ptr;
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UTF */
c = *ptr;
/* Note that this function is called only for ANY or ANYCRLF. */
if (type == NLTYPE_ANYCRLF) switch(c) if (type == NLTYPE_ANYCRLF) switch(c)
{ {
case CHAR_LF: *lenptr = 1; return TRUE; case CHAR_LF:
case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; *lenptr = 1;
return TRUE; return TRUE;
default: return FALSE;
case CHAR_CR:
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
return TRUE;
default:
return FALSE;
} }
/* NLTYPE_ANY */ /* NLTYPE_ANY */
@ -106,7 +111,9 @@ else switch(c)
#endif #endif
case CHAR_LF: case CHAR_LF:
case CHAR_VT: case CHAR_VT:
case CHAR_FF: *lenptr = 1; return TRUE; case CHAR_FF:
*lenptr = 1;
return TRUE;
case CHAR_CR: case CHAR_CR:
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
@ -114,17 +121,26 @@ else switch(c)
#ifndef EBCDIC #ifndef EBCDIC
#if PCRE2_CODE_UNIT_WIDTH == 8 #if PCRE2_CODE_UNIT_WIDTH == 8
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE; case CHAR_NEL:
*lenptr = utf? 2 : 1;
return TRUE;
case 0x2028: /* LS */ case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */ case 0x2029: /* PS */
*lenptr = 3;
return TRUE;
#else /* 16-bit or 32-bit code units */ #else /* 16-bit or 32-bit code units */
case CHAR_NEL: case CHAR_NEL:
case 0x2028: /* LS */ case 0x2028: /* LS */
case 0x2029: *lenptr = 1; return TRUE; /* PS */ case 0x2029: /* PS */
*lenptr = 1;
return TRUE;
#endif #endif
#endif /* Not EBCDIC */ #endif /* Not EBCDIC */
default: return FALSE; default:
return FALSE;
} }
} }
@ -134,8 +150,10 @@ else switch(c)
* Check for newline at previous position * * Check for newline at previous position *
*************************************************/ *************************************************/
/* It is guaranteed that the initial value of ptr is greater than the start of /* This function is called only via the WAS_NEWLINE macro, which does so only
the string that is being processed. when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial
value of ptr is greater than the start of the string that is being processed.
Arguments: Arguments:
ptr pointer to possible newline ptr pointer to possible newline
@ -148,8 +166,8 @@ Returns: TRUE or FALSE
*/ */
BOOL BOOL
PRIV(was_newline)(PCRE2_SPTR ptr, int type, PCRE2_SPTR startptr, int *lenptr, PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
BOOL utf) uint32_t *lenptr, BOOL utf)
{ {
uint32_t c; uint32_t c;
ptr--; ptr--;
@ -160,23 +178,24 @@ if (utf)
BACKCHAR(ptr); BACKCHAR(ptr);
GETCHAR(c, ptr); GETCHAR(c, ptr);
} }
else else c = *ptr;
#else #else
(void)utf; (void)utf;
c = *ptr;
#endif /* SUPPORT_UTF */ #endif /* SUPPORT_UTF */
c = *ptr;
/* Note that this function is called only for ANY or ANYCRLF. */
if (type == NLTYPE_ANYCRLF) switch(c) if (type == NLTYPE_ANYCRLF) switch(c)
{ {
case CHAR_LF: case CHAR_LF:
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
return TRUE; return TRUE;
case CHAR_CR: *lenptr = 1; return TRUE; case CHAR_CR:
default: return FALSE; *lenptr = 1;
return TRUE;
default:
return FALSE;
} }
/* NLTYPE_ANY */ /* NLTYPE_ANY */
@ -192,21 +211,32 @@ else switch(c)
#endif #endif
case CHAR_VT: case CHAR_VT:
case CHAR_FF: case CHAR_FF:
case CHAR_CR: *lenptr = 1; return TRUE; case CHAR_CR:
*lenptr = 1;
return TRUE;
#ifndef EBCDIC #ifndef EBCDIC
#if PCRE2_CODE_UNIT_WIDTH == 8 #if PCRE2_CODE_UNIT_WIDTH == 8
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE; case CHAR_NEL:
*lenptr = utf? 2 : 1;
return TRUE;
case 0x2028: /* LS */ case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */ case 0x2029: /* PS */
*lenptr = 3;
return TRUE;
#else /* 16-bit or 32-bit code units */ #else /* 16-bit or 32-bit code units */
case CHAR_NEL: case CHAR_NEL:
case 0x2028: /* LS */ case 0x2028: /* LS */
case 0x2029: *lenptr = 1; return TRUE; /* PS */ case 0x2029: /* PS */
*lenptr = 1;
return TRUE;
#endif #endif
#endif /* Not EBCDIC */ #endif /* Not EBCDIC */
default: return FALSE; default:
return FALSE;
} }
} }

View File

@ -150,7 +150,6 @@ switch(what)
break; break;
case PCRE2_INFO_MATCHLIMIT: case PCRE2_INFO_MATCHLIMIT:
if ((re->flags & PCRE2_MLSET) == 0) return PCRE2_ERROR_UNSET;
*((uint32_t *)where) = re->limit_match; *((uint32_t *)where) = re->limit_match;
break; break;
@ -179,7 +178,6 @@ switch(what)
break; break;
case PCRE2_INFO_RECURSIONLIMIT: case PCRE2_INFO_RECURSIONLIMIT:
if ((re->flags & PCRE2_RLSET) == 0) return PCRE2_ERROR_UNSET;
*((uint32_t *)where) = re->limit_recursion; *((uint32_t *)where) = re->limit_recursion;
break; break;

View File

@ -2632,7 +2632,7 @@ pattern_info(int what, void *where)
{ {
int rc; int rc;
PCRE2_PATTERN_INFO(rc, compiled_code, what, where); PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
if (rc >= 0 || rc == PCRE2_ERROR_UNSET) return 0; if (rc >= 0) return 0;
fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode, fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
what); what);
if (rc == PCRE2_ERROR_BADMODE) if (rc == PCRE2_ERROR_BADMODE)
@ -2831,7 +2831,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
int nameentrysize, namecount; int nameentrysize, namecount;
uint32_t bsr_convention, newline_convention; uint32_t bsr_convention, newline_convention;
uint32_t first_cunit, last_cunit; uint32_t first_cunit, last_cunit;
uint32_t match_limit = 0, recursion_limit = 0; uint32_t match_limit, recursion_limit;
/* These info requests should always succeed. */ /* These info requests should always succeed. */
@ -2865,10 +2865,10 @@ if ((pat_patctl.control & CTL_INFO) != 0)
if (maxlookbehind > 0) if (maxlookbehind > 0)
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind); fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
if (match_limit > 0) if (match_limit != UINT32_MAX)
fprintf(outfile, "Match limit = %u\n", match_limit); fprintf(outfile, "Match limit = %u\n", match_limit);
if (recursion_limit > 0) if (recursion_limit != UINT32_MAX)
fprintf(outfile, "Recursion limit = %u\n", recursion_limit); fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
if (namecount > 0) if (namecount > 0)