The pcre2_match() function's basic facilities are working (though hardly
tested).
This commit is contained in:
parent
06aa11b428
commit
38b570f99d
19
src/pcre2.h
19
src/pcre2.h
|
@ -215,7 +215,6 @@ must all be greater than zero. */
|
|||
#define PCRE2_ERROR_NULL (-50)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-51)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-52)
|
||||
#define PCRE2_ERROR_UNSET (-53)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
@ -309,17 +308,17 @@ typedef struct pcre2_callout_block { \
|
|||
int version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
int callout_number; /* Number compiled into pattern */ \
|
||||
int *offset_vector; /* The offset vector */ \
|
||||
size_t *offset_vector; /* The offset vector */ \
|
||||
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||
int subject_length; /* The length of the subject */ \
|
||||
int start_match; /* Offset to start of this match attempt */ \
|
||||
int current_position; /* Where we currently are in the subject */ \
|
||||
int capture_top; /* Max current capture */ \
|
||||
int capture_last; /* Most recently closed capture */ \
|
||||
size_t subject_length; /* The length of the subject */ \
|
||||
size_t start_match; /* Offset to start of this match attempt */ \
|
||||
size_t current_position; /* Where we currently are in the subject */ \
|
||||
uint32_t capture_top; /* Max current capture */ \
|
||||
uint32_t capture_last; /* Most recently closed capture */ \
|
||||
void *callout_data; /* Data passed in with the call */ \
|
||||
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||
int pattern_position; /* Offset to next item in the pattern */ \
|
||||
int next_item_length; /* Length of next item in the pattern */ \
|
||||
size_t pattern_position; /* Offset to next item in the pattern */ \
|
||||
size_t next_item_length; /* Length of next item in the pattern */ \
|
||||
/* ------------------- Added for Version 2 -------------------------- */ \
|
||||
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
|
@ -381,7 +380,7 @@ PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
|
|||
PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_callout_block *, void *)); \
|
||||
int (*)(pcre2_callout_block *), void *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \
|
||||
|
|
|
@ -215,7 +215,6 @@ must all be greater than zero. */
|
|||
#define PCRE2_ERROR_NULL (-50)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-51)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-52)
|
||||
#define PCRE2_ERROR_UNSET (-53)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
@ -309,17 +308,17 @@ typedef struct pcre2_callout_block { \
|
|||
int version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
int callout_number; /* Number compiled into pattern */ \
|
||||
int *offset_vector; /* The offset vector */ \
|
||||
size_t *offset_vector; /* The offset vector */ \
|
||||
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||
int subject_length; /* The length of the subject */ \
|
||||
int start_match; /* Offset to start of this match attempt */ \
|
||||
int current_position; /* Where we currently are in the subject */ \
|
||||
int capture_top; /* Max current capture */ \
|
||||
int capture_last; /* Most recently closed capture */ \
|
||||
size_t subject_length; /* The length of the subject */ \
|
||||
size_t start_match; /* Offset to start of this match attempt */ \
|
||||
size_t current_position; /* Where we currently are in the subject */ \
|
||||
uint32_t capture_top; /* Max current capture */ \
|
||||
uint32_t capture_last; /* Most recently closed capture */ \
|
||||
void *callout_data; /* Data passed in with the call */ \
|
||||
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||
int pattern_position; /* Offset to next item in the pattern */ \
|
||||
int next_item_length; /* Length of next item in the pattern */ \
|
||||
size_t pattern_position; /* Offset to next item in the pattern */ \
|
||||
size_t next_item_length; /* Length of next item in the pattern */ \
|
||||
/* ------------------- Added for Version 2 -------------------------- */ \
|
||||
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
|
@ -381,7 +380,7 @@ PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
|
|||
PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_callout_block *, void *)); \
|
||||
int (*)(pcre2_callout_block *), void *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \
|
||||
|
|
|
@ -559,14 +559,14 @@ which case the base cannot be possessified.
|
|||
Arguments:
|
||||
code points to the byte code
|
||||
utf TRUE in UTF mode
|
||||
cd compile data block
|
||||
cb compile data block
|
||||
base_list the data list of the base opcode
|
||||
|
||||
Returns: TRUE if the auto-possessification is possible
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_data *cd,
|
||||
compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb,
|
||||
const uint32_t *base_list, PCRE2_SPTR base_end)
|
||||
{
|
||||
PCRE2_UCHAR c;
|
||||
|
@ -654,7 +654,7 @@ for(;;)
|
|||
|
||||
while (*next_code == OP_ALT)
|
||||
{
|
||||
if (!compare_opcodes(code, utf, cd, base_list, base_end)) return FALSE;
|
||||
if (!compare_opcodes(code, utf, cb, base_list, base_end)) return FALSE;
|
||||
code = next_code + 1 + LINK_SIZE;
|
||||
next_code += GET(next_code, 1);
|
||||
}
|
||||
|
@ -674,7 +674,7 @@ for(;;)
|
|||
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
|
||||
|
||||
next_code += 1 + LINK_SIZE;
|
||||
if (!compare_opcodes(next_code, utf, cd, base_list, base_end))
|
||||
if (!compare_opcodes(next_code, utf, cb, base_list, base_end))
|
||||
return FALSE;
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
|
@ -686,7 +686,7 @@ for(;;)
|
|||
|
||||
/* Check for a supported opcode, and load its properties. */
|
||||
|
||||
code = get_chr_property_list(code, utf, cd->fcc, list);
|
||||
code = get_chr_property_list(code, utf, cb->fcc, list);
|
||||
if (code == NULL) return FALSE; /* Unsupported */
|
||||
|
||||
/* If either opcode is a small character list, set pointers for comparing
|
||||
|
@ -755,21 +755,21 @@ for(;;)
|
|||
invert_bits = TRUE;
|
||||
/* Fall through */
|
||||
case OP_DIGIT:
|
||||
set2 = (uint8_t *)(cd->cbits + cbit_digit);
|
||||
set2 = (uint8_t *)(cb->cbits + cbit_digit);
|
||||
break;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
invert_bits = TRUE;
|
||||
/* Fall through */
|
||||
case OP_WHITESPACE:
|
||||
set2 = (uint8_t *)(cd->cbits + cbit_space);
|
||||
set2 = (uint8_t *)(cb->cbits + cbit_space);
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
invert_bits = TRUE;
|
||||
/* Fall through */
|
||||
case OP_WORDCHAR:
|
||||
set2 = (uint8_t *)(cd->cbits + cbit_word);
|
||||
set2 = (uint8_t *)(cb->cbits + cbit_word);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -963,27 +963,27 @@ for(;;)
|
|||
set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
|
||||
|
||||
case OP_DIGIT:
|
||||
if (chr < 256 && (cd->ctypes[chr] & ctype_digit) != 0) return FALSE;
|
||||
if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
if (chr > 255 || (cd->ctypes[chr] & ctype_digit) == 0) return FALSE;
|
||||
if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_WHITESPACE:
|
||||
if (chr < 256 && (cd->ctypes[chr] & ctype_space) != 0) return FALSE;
|
||||
if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
if (chr > 255 || (cd->ctypes[chr] & ctype_space) == 0) return FALSE;
|
||||
if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
if (chr < 255 && (cd->ctypes[chr] & ctype_word) != 0) return FALSE;
|
||||
if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
if (chr > 255 || (cd->ctypes[chr] & ctype_word) == 0) return FALSE;
|
||||
if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
|
||||
break;
|
||||
|
||||
case OP_HSPACE:
|
||||
|
@ -1095,13 +1095,13 @@ if appropriate. This function modifies the compiled opcode!
|
|||
Arguments:
|
||||
code points to start of the byte code
|
||||
utf TRUE in UTF mode
|
||||
cd compile data block
|
||||
cb compile data block
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
void
|
||||
PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_data *cd)
|
||||
PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
|
||||
{
|
||||
register PCRE2_UCHAR c;
|
||||
PCRE2_SPTR end;
|
||||
|
@ -1116,10 +1116,10 @@ for (;;)
|
|||
{
|
||||
c -= get_repeat_base(c) - OP_STAR;
|
||||
end = (c <= OP_MINUPTO) ?
|
||||
get_chr_property_list(code, utf, cd->fcc, list) : NULL;
|
||||
get_chr_property_list(code, utf, cb->fcc, list) : NULL;
|
||||
list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
|
||||
|
||||
if (end != NULL && compare_opcodes(end, utf, cd, list, end))
|
||||
if (end != NULL && compare_opcodes(end, utf, cb, list, end))
|
||||
{
|
||||
switch(c)
|
||||
{
|
||||
|
@ -1171,11 +1171,11 @@ for (;;)
|
|||
if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
|
||||
{
|
||||
/* end must not be NULL. */
|
||||
end = get_chr_property_list(code, utf, cd->fcc, list);
|
||||
end = get_chr_property_list(code, utf, cb->fcc, list);
|
||||
|
||||
list[1] = (c & 1) == 0;
|
||||
|
||||
if (compare_opcodes(end, utf, cd, list, end))
|
||||
if (compare_opcodes(end, utf, cb, list, end))
|
||||
{
|
||||
switch (c)
|
||||
{
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -172,6 +172,7 @@ mcontext->stack_malloc = mcontext->malloc;
|
|||
mcontext->stack_free = mcontext->free;
|
||||
#endif
|
||||
mcontext->callout = NULL;
|
||||
mcontext->callout_data = NULL;
|
||||
mcontext->newline_convention = 0;
|
||||
mcontext->bsr_convention = 0;
|
||||
mcontext->match_limit = MATCH_LIMIT;
|
||||
|
@ -368,9 +369,10 @@ switch(newline)
|
|||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_callout(pcre2_match_context *mcontext,
|
||||
int (*callout)(pcre2_callout_block *, void *))
|
||||
int (*callout)(pcre2_callout_block *), void *callout_data)
|
||||
{
|
||||
mcontext->callout = callout;
|
||||
mcontext->callout_data = callout_data;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -228,8 +228,6 @@ static const char match_error_texts[] =
|
|||
"NULL argument passed\0"
|
||||
"nested recursion at the same subject position\0"
|
||||
"recursion limit exceeded\0"
|
||||
"unknown opcode - pattern overwritten?\0"
|
||||
"value unset\0" /* Used by pcre2_pattern_info() */
|
||||
;
|
||||
|
||||
|
||||
|
|
|
@ -522,9 +522,7 @@ bytes in a code unit in that mode. */
|
|||
#define PCRE2_JCHANGED 0x00000400 /* j option used in pattern */
|
||||
#define PCRE2_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */
|
||||
#define PCRE2_HASTHEN 0x00001000 /* pattern contains (*THEN) */
|
||||
#define PCRE2_MLSET 0x00002000 /* match limit set by pattern */
|
||||
#define PCRE2_RLSET 0x00004000 /* recursion limit set by pattern */
|
||||
#define PCRE2_MATCH_EMPTY 0x00008000 /* pattern can match empty string */
|
||||
#define PCRE2_MATCH_EMPTY 0x00002000 /* pattern can match empty string */
|
||||
|
||||
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
|
||||
|
||||
|
@ -540,7 +538,7 @@ endianness. */
|
|||
/* The maximum remaining length of subject we are prepared to search for a
|
||||
req_unit match. */
|
||||
|
||||
#define REQ_UNIT_MAX 1000
|
||||
#define REQ_CU_MAX 1000
|
||||
|
||||
/* Bit definitions for entries in the pcre_ctypes table. */
|
||||
|
||||
|
@ -1816,8 +1814,10 @@ compiling the library, PCRE2_CODE_UNIT_WIDTH will be defined, and we can
|
|||
include them at the appropriate width, after setting up suffix macros for the
|
||||
private structures. */
|
||||
|
||||
#define compile_data PCRE2_SUFFIX(compile_data_)
|
||||
#define branch_chain PCRE2_SUFFIX(branch_chain_)
|
||||
#define compile_block PCRE2_SUFFIX(compile_block_)
|
||||
#define dfa_match_block PCRE2_SUFFIX(dfa_match_block_)
|
||||
#define match_block PCRE2_SUFFIX(match_block_)
|
||||
#define named_group PCRE2_SUFFIX(named_group_)
|
||||
|
||||
#include "pcre2_intmodedep.h"
|
||||
|
@ -1845,10 +1845,11 @@ is available. */
|
|||
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
|
||||
#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_)
|
||||
|
||||
extern void _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL, const compile_data *);
|
||||
extern void _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL, const compile_block *);
|
||||
extern void _pcre2_compile_context_init(pcre2_compile_context *, BOOL);
|
||||
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, int, PCRE2_SPTR, int *, BOOL);
|
||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
|
||||
BOOL);
|
||||
extern void _pcre2_match_context_init(pcre2_match_context *, BOOL);
|
||||
extern void *_pcre2_memctl_malloc(size_t, size_t, pcre2_memctl *);
|
||||
extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *);
|
||||
|
@ -1859,7 +1860,8 @@ extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t);
|
|||
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
|
||||
extern int _pcre2_study(pcre2_real_code *);
|
||||
extern int _pcre2_valid_utf(PCRE2_SPTR, int, size_t *);
|
||||
extern BOOL _pcre2_was_newline(PCRE2_SPTR, int, PCRE2_SPTR, int *, BOOL);
|
||||
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
|
||||
BOOL);
|
||||
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH */
|
||||
|
||||
|
|
|
@ -77,6 +77,7 @@ just to undefine them all. */
|
|||
#undef PUT2INC
|
||||
#undef PUTCHAR
|
||||
#undef PUTINC
|
||||
#undef TABLE_GET
|
||||
|
||||
|
||||
|
||||
|
@ -197,8 +198,12 @@ arithmetic results in a signed value. Hence the cast. */
|
|||
#define PUT2(a,n,d) a[n] = d
|
||||
#endif
|
||||
|
||||
/* Other macros that are different for 8-bit mode. The maximum length of a MARK
|
||||
name must fit in one code unit; currently it is set to 255 or 65535. */
|
||||
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
|
||||
whether its argument is less than 256. The maximum length of a MARK name must
|
||||
fit in one code unit; currently it is set to 255 or 65535. The TABLE_GET macro
|
||||
is used to access elements of tables containing exactly 256 items. When code
|
||||
points can be greater than 255, a check is needed before accessing these
|
||||
tables. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAX_255(c) TRUE
|
||||
|
@ -206,11 +211,13 @@ name must fit in one code unit; currently it is set to 255 or 65535. */
|
|||
#ifdef SUPPORT_UTF
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
#endif /* SUPPORT_UTF */
|
||||
#define TABLE_GET(c, table, default) ((table)[c])
|
||||
|
||||
#else /* Code units are 16 or 32 bits */
|
||||
#define MAX_255(c) ((c) <= 255u)
|
||||
#define MAX_MARK ((1u << 16) - 1)
|
||||
#define SUPPORT_WIDE_CHARS
|
||||
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -557,7 +564,8 @@ typedef struct pcre2_real_match_context {
|
|||
void * (*stack_malloc)(size_t, void *);
|
||||
void (*stack_free)(void *, void *);
|
||||
#endif
|
||||
int (*callout)(pcre2_callout_block *, void *);
|
||||
int (*callout)(pcre2_callout_block *);
|
||||
void *callout_data;
|
||||
uint16_t bsr_convention;
|
||||
uint16_t newline_convention;
|
||||
uint32_t match_limit;
|
||||
|
@ -632,7 +640,7 @@ typedef struct named_group {
|
|||
/* Structure for passing "static" information around between the functions
|
||||
doing the compiling, so that they are thread-safe. */
|
||||
|
||||
typedef struct compile_data {
|
||||
typedef struct compile_block {
|
||||
pcre2_real_compile_context *cx; /* Points to the compile context */
|
||||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
const uint8_t *fcc; /* Points to case-flipping table */
|
||||
|
@ -643,32 +651,131 @@ typedef struct compile_data {
|
|||
PCRE2_SPTR start_pattern; /* The start of the pattern */
|
||||
PCRE2_SPTR end_pattern; /* The end of the pattern */
|
||||
PCRE2_UCHAR *hwm; /* High watermark of workspace */
|
||||
PCRE2_UCHAR *name_table; /* The name/number table */
|
||||
size_t workspace_size; /* Size of workspace */
|
||||
uint16_t names_found; /* Number of entries so far */
|
||||
uint16_t name_entry_size; /* Size of each entry */
|
||||
open_capitem *open_caps; /* Chain of open capture items */
|
||||
named_group *named_groups; /* Points to vector in pre-compile */
|
||||
PCRE2_UCHAR *name_table; /* The name/number table */
|
||||
int names_found; /* Number of entries so far */
|
||||
int name_entry_size; /* Size of each entry */
|
||||
int named_group_list_size; /* Number of entries in the list */
|
||||
int workspace_size; /* Size of workspace */
|
||||
unsigned int bracount; /* Count of capturing parens as we compile */
|
||||
int final_bracount; /* Saved value after first pass */
|
||||
int max_lookbehind; /* Maximum lookbehind (characters) */
|
||||
int top_backref; /* Maximum back reference */
|
||||
unsigned int backref_map; /* Bitmap of low back refs */
|
||||
unsigned int namedrefcount; /* Number of backreferences by name */
|
||||
int parens_depth; /* Depth of nested parentheses */
|
||||
int assert_depth; /* Depth of nested assertions */
|
||||
uint32_t named_group_list_size; /* Number of entries in the list */
|
||||
uint32_t external_options; /* External (initial) options */
|
||||
uint32_t external_flags; /* External flag bits to be set */
|
||||
uint32_t bracount; /* Count of capturing parens as we compile */
|
||||
uint32_t final_bracount; /* Saved value after first pass */
|
||||
uint32_t top_backref; /* Maximum back reference */
|
||||
uint32_t backref_map; /* Bitmap of low back refs */
|
||||
uint32_t namedrefcount; /* Number of backreferences by name */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
|
||||
int max_lookbehind; /* Maximum lookbehind (characters) */
|
||||
int parens_depth; /* Depth of nested parentheses */
|
||||
int assert_depth; /* Depth of nested assertions */
|
||||
int req_varyopt; /* "After variable item" flag for reqbyte */
|
||||
BOOL had_accept; /* (*ACCEPT) encountered */
|
||||
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
||||
BOOL check_lookbehind; /* Lookbehinds need later checking */
|
||||
BOOL dupnames; /* Duplicate names exist */
|
||||
int nltype; /* Newline type */
|
||||
int nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
|
||||
} compile_data;
|
||||
} compile_block;
|
||||
|
||||
/* Structure for items in a linked list that represents an explicit recursive
|
||||
call within the pattern; used by pcre_match(). */
|
||||
|
||||
typedef struct recursion_info {
|
||||
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
||||
unsigned int group_num; /* Number of group that was called */
|
||||
size_t *offset_save; /* Pointer to start of saved offsets */
|
||||
uint32_t saved_max; /* Number of saved offsets */
|
||||
uint32_t saved_capture_last; /* Last capture number */
|
||||
PCRE2_SPTR subject_position; /* Position at start of recursion */
|
||||
} recursion_info;
|
||||
|
||||
/* A similar structure for pcre_dfa_match(). */
|
||||
|
||||
typedef struct dfa_recursion_info {
|
||||
struct dfa_recursion_info *prevrec;
|
||||
uint32_t group_num;
|
||||
PCRE2_SPTR subject_position;
|
||||
} dfa_recursion_info;
|
||||
|
||||
/* Structure for building a chain of data for holding the values of the subject
|
||||
pointer at the start of each subpattern, so as to detect when an empty string
|
||||
has been matched by a subpattern - to break infinite loops; used by
|
||||
pcre2_match(). */
|
||||
|
||||
typedef struct eptrblock {
|
||||
struct eptrblock *epb_prev;
|
||||
PCRE2_SPTR epb_saved_eptr;
|
||||
} eptrblock;
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing traditional NFA matching (pcre2_match() and friends). */
|
||||
|
||||
typedef struct match_block {
|
||||
pcre2_memctl memctl;
|
||||
unsigned long int match_call_count; /* As it says */
|
||||
unsigned long int match_limit; /* As it says */
|
||||
unsigned long int match_limit_recursion; /* As it says */
|
||||
BOOL hitend; /* Hit the end of the subject at some point */
|
||||
BOOL hasthen; /* Pattern contains (*THEN) */
|
||||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
const uint8_t *fcc; /* Points to case-flipping table */
|
||||
const uint8_t *ctypes; /* Points to table of type maps */
|
||||
size_t *ovector; /* Pointer to the offset vector */
|
||||
size_t offset_end; /* One past the end */
|
||||
size_t offset_max; /* The maximum usable for return data */
|
||||
size_t start_offset; /* The start offset value */
|
||||
size_t end_offset_top; /* Highwater mark at end of match */
|
||||
uint16_t partial; /* PARTIAL options */
|
||||
uint16_t bsr_convention; /* \R interpretation */
|
||||
uint16_t name_count; /* Number of names in name table */
|
||||
uint16_t name_entry_size; /* Size of entry in names table */
|
||||
PCRE2_SPTR name_table; /* Table of group names */
|
||||
PCRE2_SPTR start_code; /* For use when recursing */
|
||||
PCRE2_SPTR start_subject; /* Start of the subject string */
|
||||
PCRE2_SPTR end_subject; /* End of the subject string */
|
||||
PCRE2_SPTR start_match_ptr; /* Start of matched string */
|
||||
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
|
||||
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
|
||||
PCRE2_SPTR once_target; /* Where to back up to for atomic groups */
|
||||
uint32_t moptions; /* Match options */
|
||||
uint32_t poptions; /* Pattern options */
|
||||
uint32_t capture_last; /* Most recent capture number + overflow flag */
|
||||
uint32_t skip_arg_count; /* For counting SKIP_ARGs */
|
||||
uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */
|
||||
uint32_t match_function_type; /* Set for certain special calls of match() */
|
||||
uint32_t nltype; /* Newline type */
|
||||
uint32_t nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
|
||||
recursion_info *recursive; /* Linked list of recursion data */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
|
||||
#ifdef NO_RECURSE
|
||||
void *match_frames_base; /* For remembering malloc'd frames */
|
||||
#endif
|
||||
} match_block;
|
||||
|
||||
/* A similar structure is used for the same purpose by the DFA matching
|
||||
functions. */
|
||||
|
||||
typedef struct dfa_match_block {
|
||||
PCRE2_SPTR start_code; /* Start of the compiled pattern */
|
||||
PCRE2_SPTR start_subject ; /* Start of the subject string */
|
||||
PCRE2_SPTR end_subject; /* End of subject string */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
const uint8_t *tables; /* Character tables */
|
||||
int start_offset; /* The start offset value */
|
||||
uint32_t moptions; /* Match options */
|
||||
uint32_t poptions; /* Pattern options */
|
||||
int nltype; /* Newline type */
|
||||
int nllen; /* Newline string length */
|
||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
dfa_recursion_info *recursive; /* Linked list of recursion data */
|
||||
} dfa_match_block;
|
||||
|
||||
#endif /* PCRE2_PCRE2TEST */
|
||||
|
||||
|
|
6901
src/pcre2_match.c
6901
src/pcre2_match.c
File diff suppressed because it is too large
Load Diff
|
@ -60,8 +60,10 @@ http://unicode.org/unicode/reports/tr18/. */
|
|||
* Check for newline at given position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is less than the end of the
|
||||
string that is being processed.
|
||||
/* This function is called only via the IS_NEWLINE macro, which does so only
|
||||
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
|
||||
pointed to by ptr is less than the end of the string.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
|
@ -74,27 +76,30 @@ Returns: TRUE or FALSE
|
|||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(is_newline)(PCRE2_SPTR ptr, int type, PCRE2_SPTR endptr, int *lenptr,
|
||||
BOOL utf)
|
||||
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
|
||||
uint32_t *lenptr, BOOL utf)
|
||||
{
|
||||
uint32_t c;
|
||||
|
||||
#ifdef SUPPORT_UTF
|
||||
if (utf) { GETCHAR(c, ptr); } else
|
||||
if (utf) { GETCHAR(c, ptr); } else c = *ptr;
|
||||
#else
|
||||
(void)utf;
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
c = *ptr;
|
||||
|
||||
/* Note that this function is called only for ANY or ANYCRLF. */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case CHAR_LF: *lenptr = 1; return TRUE;
|
||||
case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
default: return FALSE;
|
||||
case CHAR_LF:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
@ -106,7 +111,9 @@ else switch(c)
|
|||
#endif
|
||||
case CHAR_LF:
|
||||
case CHAR_VT:
|
||||
case CHAR_FF: *lenptr = 1; return TRUE;
|
||||
case CHAR_FF:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
|
@ -114,17 +121,26 @@ else switch(c)
|
|||
|
||||
#ifndef EBCDIC
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
case CHAR_NEL:
|
||||
*lenptr = utf? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 3;
|
||||
return TRUE;
|
||||
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 1; return TRUE; /* PS */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default: return FALSE;
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -134,8 +150,10 @@ else switch(c)
|
|||
* Check for newline at previous position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is greater than the start of
|
||||
the string that is being processed.
|
||||
/* This function is called only via the WAS_NEWLINE macro, which does so only
|
||||
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial
|
||||
value of ptr is greater than the start of the string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
|
@ -148,8 +166,8 @@ Returns: TRUE or FALSE
|
|||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(was_newline)(PCRE2_SPTR ptr, int type, PCRE2_SPTR startptr, int *lenptr,
|
||||
BOOL utf)
|
||||
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
|
||||
uint32_t *lenptr, BOOL utf)
|
||||
{
|
||||
uint32_t c;
|
||||
ptr--;
|
||||
|
@ -160,23 +178,24 @@ if (utf)
|
|||
BACKCHAR(ptr);
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else
|
||||
else c = *ptr;
|
||||
#else
|
||||
(void)utf;
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
c = *ptr;
|
||||
|
||||
/* Note that this function is called only for ANY or ANYCRLF. */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR: *lenptr = 1; return TRUE;
|
||||
default: return FALSE;
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
@ -192,21 +211,32 @@ else switch(c)
|
|||
#endif
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_CR: *lenptr = 1; return TRUE;
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
#ifndef EBCDIC
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
case CHAR_NEL:
|
||||
*lenptr = utf? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 3;
|
||||
return TRUE;
|
||||
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 1; return TRUE; /* PS */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default: return FALSE;
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -150,7 +150,6 @@ switch(what)
|
|||
break;
|
||||
|
||||
case PCRE2_INFO_MATCHLIMIT:
|
||||
if ((re->flags & PCRE2_MLSET) == 0) return PCRE2_ERROR_UNSET;
|
||||
*((uint32_t *)where) = re->limit_match;
|
||||
break;
|
||||
|
||||
|
@ -179,7 +178,6 @@ switch(what)
|
|||
break;
|
||||
|
||||
case PCRE2_INFO_RECURSIONLIMIT:
|
||||
if ((re->flags & PCRE2_RLSET) == 0) return PCRE2_ERROR_UNSET;
|
||||
*((uint32_t *)where) = re->limit_recursion;
|
||||
break;
|
||||
|
||||
|
|
|
@ -2632,7 +2632,7 @@ pattern_info(int what, void *where)
|
|||
{
|
||||
int rc;
|
||||
PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
|
||||
if (rc >= 0 || rc == PCRE2_ERROR_UNSET) return 0;
|
||||
if (rc >= 0) return 0;
|
||||
fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
|
||||
what);
|
||||
if (rc == PCRE2_ERROR_BADMODE)
|
||||
|
@ -2831,7 +2831,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
int nameentrysize, namecount;
|
||||
uint32_t bsr_convention, newline_convention;
|
||||
uint32_t first_cunit, last_cunit;
|
||||
uint32_t match_limit = 0, recursion_limit = 0;
|
||||
uint32_t match_limit, recursion_limit;
|
||||
|
||||
/* These info requests should always succeed. */
|
||||
|
||||
|
@ -2865,10 +2865,10 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
if (maxlookbehind > 0)
|
||||
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
|
||||
|
||||
if (match_limit > 0)
|
||||
if (match_limit != UINT32_MAX)
|
||||
fprintf(outfile, "Match limit = %u\n", match_limit);
|
||||
|
||||
if (recursion_limit > 0)
|
||||
if (recursion_limit != UINT32_MAX)
|
||||
fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
|
||||
|
||||
if (namecount > 0)
|
||||
|
|
Loading…
Reference in New Issue