The pcre2_match() function's basic facilities are working (though hardly

tested).
This commit is contained in:
Philip.Hazel 2014-06-28 14:23:18 +00:00
parent 06aa11b428
commit 38b570f99d
12 changed files with 7458 additions and 561 deletions

View File

@ -215,7 +215,6 @@ must all be greater than zero. */
#define PCRE2_ERROR_NULL (-50)
#define PCRE2_ERROR_RECURSELOOP (-51)
#define PCRE2_ERROR_RECURSIONLIMIT (-52)
#define PCRE2_ERROR_UNSET (-53)
/* Request types for pcre2_pattern_info() */
@ -309,17 +308,17 @@ typedef struct pcre2_callout_block { \
int version; /* Identifies version of block */ \
/* ------------------------ Version 0 ------------------------------- */ \
int callout_number; /* Number compiled into pattern */ \
int *offset_vector; /* The offset vector */ \
size_t *offset_vector; /* The offset vector */ \
PCRE2_SPTR subject; /* The subject being matched */ \
int subject_length; /* The length of the subject */ \
int start_match; /* Offset to start of this match attempt */ \
int current_position; /* Where we currently are in the subject */ \
int capture_top; /* Max current capture */ \
int capture_last; /* Most recently closed capture */ \
size_t subject_length; /* The length of the subject */ \
size_t start_match; /* Offset to start of this match attempt */ \
size_t current_position; /* Where we currently are in the subject */ \
uint32_t capture_top; /* Max current capture */ \
uint32_t capture_last; /* Most recently closed capture */ \
void *callout_data; /* Data passed in with the call */ \
/* ------------------- Added for Version 1 -------------------------- */ \
int pattern_position; /* Offset to next item in the pattern */ \
int next_item_length; /* Length of next item in the pattern */ \
size_t pattern_position; /* Offset to next item in the pattern */ \
size_t next_item_length; /* Length of next item in the pattern */ \
/* ------------------- Added for Version 2 -------------------------- */ \
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
/* ------------------------------------------------------------------ */ \
@ -381,7 +380,7 @@ PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
int (*)(pcre2_callout_block *, void *)); \
int (*)(pcre2_callout_block *), void *); \
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \

View File

@ -215,7 +215,6 @@ must all be greater than zero. */
#define PCRE2_ERROR_NULL (-50)
#define PCRE2_ERROR_RECURSELOOP (-51)
#define PCRE2_ERROR_RECURSIONLIMIT (-52)
#define PCRE2_ERROR_UNSET (-53)
/* Request types for pcre2_pattern_info() */
@ -309,17 +308,17 @@ typedef struct pcre2_callout_block { \
int version; /* Identifies version of block */ \
/* ------------------------ Version 0 ------------------------------- */ \
int callout_number; /* Number compiled into pattern */ \
int *offset_vector; /* The offset vector */ \
size_t *offset_vector; /* The offset vector */ \
PCRE2_SPTR subject; /* The subject being matched */ \
int subject_length; /* The length of the subject */ \
int start_match; /* Offset to start of this match attempt */ \
int current_position; /* Where we currently are in the subject */ \
int capture_top; /* Max current capture */ \
int capture_last; /* Most recently closed capture */ \
size_t subject_length; /* The length of the subject */ \
size_t start_match; /* Offset to start of this match attempt */ \
size_t current_position; /* Where we currently are in the subject */ \
uint32_t capture_top; /* Max current capture */ \
uint32_t capture_last; /* Most recently closed capture */ \
void *callout_data; /* Data passed in with the call */ \
/* ------------------- Added for Version 1 -------------------------- */ \
int pattern_position; /* Offset to next item in the pattern */ \
int next_item_length; /* Length of next item in the pattern */ \
size_t pattern_position; /* Offset to next item in the pattern */ \
size_t next_item_length; /* Length of next item in the pattern */ \
/* ------------------- Added for Version 2 -------------------------- */ \
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
/* ------------------------------------------------------------------ */ \
@ -381,7 +380,7 @@ PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
int (*)(pcre2_callout_block *, void *)); \
int (*)(pcre2_callout_block *), void *); \
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
uint32_t); \
PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \

View File

@ -559,14 +559,14 @@ which case the base cannot be possessified.
Arguments:
code points to the byte code
utf TRUE in UTF mode
cd compile data block
cb compile data block
base_list the data list of the base opcode
Returns: TRUE if the auto-possessification is possible
*/
static BOOL
compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_data *cd,
compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb,
const uint32_t *base_list, PCRE2_SPTR base_end)
{
PCRE2_UCHAR c;
@ -654,7 +654,7 @@ for(;;)
while (*next_code == OP_ALT)
{
if (!compare_opcodes(code, utf, cd, base_list, base_end)) return FALSE;
if (!compare_opcodes(code, utf, cb, base_list, base_end)) return FALSE;
code = next_code + 1 + LINK_SIZE;
next_code += GET(next_code, 1);
}
@ -674,7 +674,7 @@ for(;;)
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
next_code += 1 + LINK_SIZE;
if (!compare_opcodes(next_code, utf, cd, base_list, base_end))
if (!compare_opcodes(next_code, utf, cb, base_list, base_end))
return FALSE;
code += PRIV(OP_lengths)[c];
@ -686,7 +686,7 @@ for(;;)
/* Check for a supported opcode, and load its properties. */
code = get_chr_property_list(code, utf, cd->fcc, list);
code = get_chr_property_list(code, utf, cb->fcc, list);
if (code == NULL) return FALSE; /* Unsupported */
/* If either opcode is a small character list, set pointers for comparing
@ -755,21 +755,21 @@ for(;;)
invert_bits = TRUE;
/* Fall through */
case OP_DIGIT:
set2 = (uint8_t *)(cd->cbits + cbit_digit);
set2 = (uint8_t *)(cb->cbits + cbit_digit);
break;
case OP_NOT_WHITESPACE:
invert_bits = TRUE;
/* Fall through */
case OP_WHITESPACE:
set2 = (uint8_t *)(cd->cbits + cbit_space);
set2 = (uint8_t *)(cb->cbits + cbit_space);
break;
case OP_NOT_WORDCHAR:
invert_bits = TRUE;
/* Fall through */
case OP_WORDCHAR:
set2 = (uint8_t *)(cd->cbits + cbit_word);
set2 = (uint8_t *)(cb->cbits + cbit_word);
break;
default:
@ -963,27 +963,27 @@ for(;;)
set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
case OP_DIGIT:
if (chr < 256 && (cd->ctypes[chr] & ctype_digit) != 0) return FALSE;
if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
break;
case OP_NOT_DIGIT:
if (chr > 255 || (cd->ctypes[chr] & ctype_digit) == 0) return FALSE;
if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
break;
case OP_WHITESPACE:
if (chr < 256 && (cd->ctypes[chr] & ctype_space) != 0) return FALSE;
if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
break;
case OP_NOT_WHITESPACE:
if (chr > 255 || (cd->ctypes[chr] & ctype_space) == 0) return FALSE;
if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
break;
case OP_WORDCHAR:
if (chr < 255 && (cd->ctypes[chr] & ctype_word) != 0) return FALSE;
if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
break;
case OP_NOT_WORDCHAR:
if (chr > 255 || (cd->ctypes[chr] & ctype_word) == 0) return FALSE;
if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
break;
case OP_HSPACE:
@ -1095,13 +1095,13 @@ if appropriate. This function modifies the compiled opcode!
Arguments:
code points to start of the byte code
utf TRUE in UTF mode
cd compile data block
cb compile data block
Returns: nothing
*/
void
PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_data *cd)
PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
{
register PCRE2_UCHAR c;
PCRE2_SPTR end;
@ -1116,10 +1116,10 @@ for (;;)
{
c -= get_repeat_base(c) - OP_STAR;
end = (c <= OP_MINUPTO) ?
get_chr_property_list(code, utf, cd->fcc, list) : NULL;
get_chr_property_list(code, utf, cb->fcc, list) : NULL;
list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
if (end != NULL && compare_opcodes(end, utf, cd, list, end))
if (end != NULL && compare_opcodes(end, utf, cb, list, end))
{
switch(c)
{
@ -1171,11 +1171,11 @@ for (;;)
if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
{
/* end must not be NULL. */
end = get_chr_property_list(code, utf, cd->fcc, list);
end = get_chr_property_list(code, utf, cb->fcc, list);
list[1] = (c & 1) == 0;
if (compare_opcodes(end, utf, cd, list, end))
if (compare_opcodes(end, utf, cb, list, end))
{
switch (c)
{

File diff suppressed because it is too large Load Diff

View File

@ -172,6 +172,7 @@ mcontext->stack_malloc = mcontext->malloc;
mcontext->stack_free = mcontext->free;
#endif
mcontext->callout = NULL;
mcontext->callout_data = NULL;
mcontext->newline_convention = 0;
mcontext->bsr_convention = 0;
mcontext->match_limit = MATCH_LIMIT;
@ -368,9 +369,10 @@ switch(newline)
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_callout(pcre2_match_context *mcontext,
int (*callout)(pcre2_callout_block *, void *))
int (*callout)(pcre2_callout_block *), void *callout_data)
{
mcontext->callout = callout;
mcontext->callout_data = callout_data;
return 1;
}

View File

@ -228,8 +228,6 @@ static const char match_error_texts[] =
"NULL argument passed\0"
"nested recursion at the same subject position\0"
"recursion limit exceeded\0"
"unknown opcode - pattern overwritten?\0"
"value unset\0" /* Used by pcre2_pattern_info() */
;

View File

@ -522,9 +522,7 @@ bytes in a code unit in that mode. */
#define PCRE2_JCHANGED 0x00000400 /* j option used in pattern */
#define PCRE2_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */
#define PCRE2_HASTHEN 0x00001000 /* pattern contains (*THEN) */
#define PCRE2_MLSET 0x00002000 /* match limit set by pattern */
#define PCRE2_RLSET 0x00004000 /* recursion limit set by pattern */
#define PCRE2_MATCH_EMPTY 0x00008000 /* pattern can match empty string */
#define PCRE2_MATCH_EMPTY 0x00002000 /* pattern can match empty string */
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
@ -540,7 +538,7 @@ endianness. */
/* The maximum remaining length of subject we are prepared to search for a
req_unit match. */
#define REQ_UNIT_MAX 1000
#define REQ_CU_MAX 1000
/* Bit definitions for entries in the pcre_ctypes table. */
@ -1816,8 +1814,10 @@ compiling the library, PCRE2_CODE_UNIT_WIDTH will be defined, and we can
include them at the appropriate width, after setting up suffix macros for the
private structures. */
#define compile_data PCRE2_SUFFIX(compile_data_)
#define branch_chain PCRE2_SUFFIX(branch_chain_)
#define compile_block PCRE2_SUFFIX(compile_block_)
#define dfa_match_block PCRE2_SUFFIX(dfa_match_block_)
#define match_block PCRE2_SUFFIX(match_block_)
#define named_group PCRE2_SUFFIX(named_group_)
#include "pcre2_intmodedep.h"
@ -1845,10 +1845,11 @@ is available. */
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_)
extern void _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL, const compile_data *);
extern void _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL, const compile_block *);
extern void _pcre2_compile_context_init(pcre2_compile_context *, BOOL);
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
extern BOOL _pcre2_is_newline(PCRE2_SPTR, int, PCRE2_SPTR, int *, BOOL);
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
BOOL);
extern void _pcre2_match_context_init(pcre2_match_context *, BOOL);
extern void *_pcre2_memctl_malloc(size_t, size_t, pcre2_memctl *);
extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *);
@ -1859,7 +1860,8 @@ extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t);
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
extern int _pcre2_study(pcre2_real_code *);
extern int _pcre2_valid_utf(PCRE2_SPTR, int, size_t *);
extern BOOL _pcre2_was_newline(PCRE2_SPTR, int, PCRE2_SPTR, int *, BOOL);
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR, uint32_t *,
BOOL);
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
#endif /* PCRE2_CODE_UNIT_WIDTH */

View File

@ -77,6 +77,7 @@ just to undefine them all. */
#undef PUT2INC
#undef PUTCHAR
#undef PUTINC
#undef TABLE_GET
@ -197,8 +198,12 @@ arithmetic results in a signed value. Hence the cast. */
#define PUT2(a,n,d) a[n] = d
#endif
/* Other macros that are different for 8-bit mode. The maximum length of a MARK
name must fit in one code unit; currently it is set to 255 or 65535. */
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
whether its argument is less than 256. The maximum length of a MARK name must
fit in one code unit; currently it is set to 255 or 65535. The TABLE_GET macro
is used to access elements of tables containing exactly 256 items. When code
points can be greater than 255, a check is needed before accessing these
tables. */
#if PCRE2_CODE_UNIT_WIDTH == 8
#define MAX_255(c) TRUE
@ -206,11 +211,13 @@ name must fit in one code unit; currently it is set to 255 or 65535. */
#ifdef SUPPORT_UTF
#define SUPPORT_WIDE_CHARS
#endif /* SUPPORT_UTF */
#define TABLE_GET(c, table, default) ((table)[c])
#else /* Code units are 16 or 32 bits */
#define MAX_255(c) ((c) <= 255u)
#define MAX_MARK ((1u << 16) - 1)
#define SUPPORT_WIDE_CHARS
#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
#endif
@ -557,7 +564,8 @@ typedef struct pcre2_real_match_context {
void * (*stack_malloc)(size_t, void *);
void (*stack_free)(void *, void *);
#endif
int (*callout)(pcre2_callout_block *, void *);
int (*callout)(pcre2_callout_block *);
void *callout_data;
uint16_t bsr_convention;
uint16_t newline_convention;
uint32_t match_limit;
@ -632,7 +640,7 @@ typedef struct named_group {
/* Structure for passing "static" information around between the functions
doing the compiling, so that they are thread-safe. */
typedef struct compile_data {
typedef struct compile_block {
pcre2_real_compile_context *cx; /* Points to the compile context */
const uint8_t *lcc; /* Points to lower casing table */
const uint8_t *fcc; /* Points to case-flipping table */
@ -643,32 +651,131 @@ typedef struct compile_data {
PCRE2_SPTR start_pattern; /* The start of the pattern */
PCRE2_SPTR end_pattern; /* The end of the pattern */
PCRE2_UCHAR *hwm; /* High watermark of workspace */
PCRE2_UCHAR *name_table; /* The name/number table */
size_t workspace_size; /* Size of workspace */
uint16_t names_found; /* Number of entries so far */
uint16_t name_entry_size; /* Size of each entry */
open_capitem *open_caps; /* Chain of open capture items */
named_group *named_groups; /* Points to vector in pre-compile */
PCRE2_UCHAR *name_table; /* The name/number table */
int names_found; /* Number of entries so far */
int name_entry_size; /* Size of each entry */
int named_group_list_size; /* Number of entries in the list */
int workspace_size; /* Size of workspace */
unsigned int bracount; /* Count of capturing parens as we compile */
int final_bracount; /* Saved value after first pass */
int max_lookbehind; /* Maximum lookbehind (characters) */
int top_backref; /* Maximum back reference */
unsigned int backref_map; /* Bitmap of low back refs */
unsigned int namedrefcount; /* Number of backreferences by name */
int parens_depth; /* Depth of nested parentheses */
int assert_depth; /* Depth of nested assertions */
uint32_t named_group_list_size; /* Number of entries in the list */
uint32_t external_options; /* External (initial) options */
uint32_t external_flags; /* External flag bits to be set */
uint32_t bracount; /* Count of capturing parens as we compile */
uint32_t final_bracount; /* Saved value after first pass */
uint32_t top_backref; /* Maximum back reference */
uint32_t backref_map; /* Bitmap of low back refs */
uint32_t namedrefcount; /* Number of backreferences by name */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
int max_lookbehind; /* Maximum lookbehind (characters) */
int parens_depth; /* Depth of nested parentheses */
int assert_depth; /* Depth of nested assertions */
int req_varyopt; /* "After variable item" flag for reqbyte */
BOOL had_accept; /* (*ACCEPT) encountered */
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
BOOL check_lookbehind; /* Lookbehinds need later checking */
BOOL dupnames; /* Duplicate names exist */
int nltype; /* Newline type */
int nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
} compile_data;
} compile_block;
/* Structure for items in a linked list that represents an explicit recursive
call within the pattern; used by pcre_match(). */
typedef struct recursion_info {
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
unsigned int group_num; /* Number of group that was called */
size_t *offset_save; /* Pointer to start of saved offsets */
uint32_t saved_max; /* Number of saved offsets */
uint32_t saved_capture_last; /* Last capture number */
PCRE2_SPTR subject_position; /* Position at start of recursion */
} recursion_info;
/* A similar structure for pcre_dfa_match(). */
typedef struct dfa_recursion_info {
struct dfa_recursion_info *prevrec;
uint32_t group_num;
PCRE2_SPTR subject_position;
} dfa_recursion_info;
/* Structure for building a chain of data for holding the values of the subject
pointer at the start of each subpattern, so as to detect when an empty string
has been matched by a subpattern - to break infinite loops; used by
pcre2_match(). */
typedef struct eptrblock {
struct eptrblock *epb_prev;
PCRE2_SPTR epb_saved_eptr;
} eptrblock;
/* Structure for passing "static" information around between the functions
doing traditional NFA matching (pcre2_match() and friends). */
typedef struct match_block {
pcre2_memctl memctl;
unsigned long int match_call_count; /* As it says */
unsigned long int match_limit; /* As it says */
unsigned long int match_limit_recursion; /* As it says */
BOOL hitend; /* Hit the end of the subject at some point */
BOOL hasthen; /* Pattern contains (*THEN) */
const uint8_t *lcc; /* Points to lower casing table */
const uint8_t *fcc; /* Points to case-flipping table */
const uint8_t *ctypes; /* Points to table of type maps */
size_t *ovector; /* Pointer to the offset vector */
size_t offset_end; /* One past the end */
size_t offset_max; /* The maximum usable for return data */
size_t start_offset; /* The start offset value */
size_t end_offset_top; /* Highwater mark at end of match */
uint16_t partial; /* PARTIAL options */
uint16_t bsr_convention; /* \R interpretation */
uint16_t name_count; /* Number of names in name table */
uint16_t name_entry_size; /* Size of entry in names table */
PCRE2_SPTR name_table; /* Table of group names */
PCRE2_SPTR start_code; /* For use when recursing */
PCRE2_SPTR start_subject; /* Start of the subject string */
PCRE2_SPTR end_subject; /* End of the subject string */
PCRE2_SPTR start_match_ptr; /* Start of matched string */
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
PCRE2_SPTR once_target; /* Where to back up to for atomic groups */
uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */
uint32_t capture_last; /* Most recent capture number + overflow flag */
uint32_t skip_arg_count; /* For counting SKIP_ARGs */
uint32_t ignore_skip_arg; /* For re-run when SKIP arg name not found */
uint32_t match_function_type; /* Set for certain special calls of match() */
uint32_t nltype; /* Newline type */
uint32_t nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
recursion_info *recursive; /* Linked list of recursion data */
void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
#ifdef NO_RECURSE
void *match_frames_base; /* For remembering malloc'd frames */
#endif
} match_block;
/* A similar structure is used for the same purpose by the DFA matching
functions. */
typedef struct dfa_match_block {
PCRE2_SPTR start_code; /* Start of the compiled pattern */
PCRE2_SPTR start_subject ; /* Start of the subject string */
PCRE2_SPTR end_subject; /* End of subject string */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
const uint8_t *tables; /* Character tables */
int start_offset; /* The start offset value */
uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */
int nltype; /* Newline type */
int nllen; /* Newline string length */
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
void *callout_data; /* To pass back to callouts */
dfa_recursion_info *recursive; /* Linked list of recursion data */
} dfa_match_block;
#endif /* PCRE2_PCRE2TEST */

File diff suppressed because it is too large Load Diff

View File

@ -60,8 +60,10 @@ http://unicode.org/unicode/reports/tr18/. */
* Check for newline at given position *
*************************************************/
/* It is guaranteed that the initial value of ptr is less than the end of the
string that is being processed.
/* This function is called only via the IS_NEWLINE macro, which does so only
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
pointed to by ptr is less than the end of the string.
Arguments:
ptr pointer to possible newline
@ -74,27 +76,30 @@ Returns: TRUE or FALSE
*/
BOOL
PRIV(is_newline)(PCRE2_SPTR ptr, int type, PCRE2_SPTR endptr, int *lenptr,
BOOL utf)
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
uint32_t *lenptr, BOOL utf)
{
uint32_t c;
#ifdef SUPPORT_UTF
if (utf) { GETCHAR(c, ptr); } else
if (utf) { GETCHAR(c, ptr); } else c = *ptr;
#else
(void)utf;
c = *ptr;
#endif /* SUPPORT_UTF */
c = *ptr;
/* Note that this function is called only for ANY or ANYCRLF. */
if (type == NLTYPE_ANYCRLF) switch(c)
{
case CHAR_LF: *lenptr = 1; return TRUE;
case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
return TRUE;
default: return FALSE;
case CHAR_LF:
*lenptr = 1;
return TRUE;
case CHAR_CR:
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
return TRUE;
default:
return FALSE;
}
/* NLTYPE_ANY */
@ -106,7 +111,9 @@ else switch(c)
#endif
case CHAR_LF:
case CHAR_VT:
case CHAR_FF: *lenptr = 1; return TRUE;
case CHAR_FF:
*lenptr = 1;
return TRUE;
case CHAR_CR:
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
@ -114,17 +121,26 @@ else switch(c)
#ifndef EBCDIC
#if PCRE2_CODE_UNIT_WIDTH == 8
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
case CHAR_NEL:
*lenptr = utf? 2 : 1;
return TRUE;
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 3;
return TRUE;
#else /* 16-bit or 32-bit code units */
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: *lenptr = 1; return TRUE; /* PS */
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 1;
return TRUE;
#endif
#endif /* Not EBCDIC */
default: return FALSE;
default:
return FALSE;
}
}
@ -134,8 +150,10 @@ else switch(c)
* Check for newline at previous position *
*************************************************/
/* It is guaranteed that the initial value of ptr is greater than the start of
the string that is being processed.
/* This function is called only via the WAS_NEWLINE macro, which does so only
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the initial
value of ptr is greater than the start of the string that is being processed.
Arguments:
ptr pointer to possible newline
@ -148,8 +166,8 @@ Returns: TRUE or FALSE
*/
BOOL
PRIV(was_newline)(PCRE2_SPTR ptr, int type, PCRE2_SPTR startptr, int *lenptr,
BOOL utf)
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
uint32_t *lenptr, BOOL utf)
{
uint32_t c;
ptr--;
@ -160,23 +178,24 @@ if (utf)
BACKCHAR(ptr);
GETCHAR(c, ptr);
}
else
else c = *ptr;
#else
(void)utf;
c = *ptr;
#endif /* SUPPORT_UTF */
c = *ptr;
/* Note that this function is called only for ANY or ANYCRLF. */
if (type == NLTYPE_ANYCRLF) switch(c)
{
case CHAR_LF:
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
return TRUE;
case CHAR_CR: *lenptr = 1; return TRUE;
default: return FALSE;
case CHAR_CR:
*lenptr = 1;
return TRUE;
default:
return FALSE;
}
/* NLTYPE_ANY */
@ -192,21 +211,32 @@ else switch(c)
#endif
case CHAR_VT:
case CHAR_FF:
case CHAR_CR: *lenptr = 1; return TRUE;
case CHAR_CR:
*lenptr = 1;
return TRUE;
#ifndef EBCDIC
#if PCRE2_CODE_UNIT_WIDTH == 8
case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
case CHAR_NEL:
*lenptr = utf? 2 : 1;
return TRUE;
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 3;
return TRUE;
#else /* 16-bit or 32-bit code units */
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: *lenptr = 1; return TRUE; /* PS */
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 1;
return TRUE;
#endif
#endif /* Not EBCDIC */
default: return FALSE;
default:
return FALSE;
}
}

View File

@ -150,7 +150,6 @@ switch(what)
break;
case PCRE2_INFO_MATCHLIMIT:
if ((re->flags & PCRE2_MLSET) == 0) return PCRE2_ERROR_UNSET;
*((uint32_t *)where) = re->limit_match;
break;
@ -179,7 +178,6 @@ switch(what)
break;
case PCRE2_INFO_RECURSIONLIMIT:
if ((re->flags & PCRE2_RLSET) == 0) return PCRE2_ERROR_UNSET;
*((uint32_t *)where) = re->limit_recursion;
break;

View File

@ -2632,7 +2632,7 @@ pattern_info(int what, void *where)
{
int rc;
PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
if (rc >= 0 || rc == PCRE2_ERROR_UNSET) return 0;
if (rc >= 0) return 0;
fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
what);
if (rc == PCRE2_ERROR_BADMODE)
@ -2831,7 +2831,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
int nameentrysize, namecount;
uint32_t bsr_convention, newline_convention;
uint32_t first_cunit, last_cunit;
uint32_t match_limit = 0, recursion_limit = 0;
uint32_t match_limit, recursion_limit;
/* These info requests should always succeed. */
@ -2865,10 +2865,10 @@ if ((pat_patctl.control & CTL_INFO) != 0)
if (maxlookbehind > 0)
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
if (match_limit > 0)
if (match_limit != UINT32_MAX)
fprintf(outfile, "Match limit = %u\n", match_limit);
if (recursion_limit > 0)
if (recursion_limit != UINT32_MAX)
fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
if (namecount > 0)