More experimental convert code evolution.

This commit is contained in:
Philip.Hazel 2017-05-23 16:08:48 +00:00
parent 51df11a591
commit 231a800557
8 changed files with 169 additions and 98 deletions

View File

@ -187,7 +187,7 @@ ignored for pcre2_jit_match(). */
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u #define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u #define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
#define PCRE2_CONVERT_GLOB 0x00000010u #define PCRE2_CONVERT_GLOB 0x00000010u
#define PCRE2_CONVERT_GLOB_NO_BACKSLASH 0x00000030u #define PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL 0x00000030u
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000050u #define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000050u
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000090u #define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000090u
#define PCRE2_CONVERT_GLOB_BASIC 0x000000f0u #define PCRE2_CONVERT_GLOB_BASIC 0x000000f0u
@ -496,6 +496,8 @@ PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
*pcre2_convert_context_create(pcre2_general_context *); \ *pcre2_convert_context_create(pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_convert_context_free(pcre2_convert_context *); \ pcre2_convert_context_free(pcre2_convert_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t); pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
@ -733,6 +735,7 @@ pcre2_compile are called by application code. */
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_) #define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) #define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_) #define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_) #define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_) #define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)

View File

@ -187,7 +187,7 @@ ignored for pcre2_jit_match(). */
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u #define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u #define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
#define PCRE2_CONVERT_GLOB 0x00000010u #define PCRE2_CONVERT_GLOB 0x00000010u
#define PCRE2_CONVERT_GLOB_NO_BACKSLASH 0x00000030u #define PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL 0x00000030u
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000050u #define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000050u
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000090u #define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000090u
#define PCRE2_CONVERT_GLOB_BASIC 0x000000f0u #define PCRE2_CONVERT_GLOB_BASIC 0x000000f0u
@ -496,6 +496,8 @@ PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
*pcre2_convert_context_create(pcre2_general_context *); \ *pcre2_convert_context_create(pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_convert_context_free(pcre2_convert_context *); \ pcre2_convert_context_free(pcre2_convert_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t); pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
@ -733,6 +735,7 @@ pcre2_compile are called by application code. */
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_) #define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) #define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_) #define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_) #define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_) #define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)

View File

@ -189,15 +189,17 @@ return mcontext;
} }
/* A default covert context is set up to save having to initialize at run time /* A default convert context is set up to save having to initialize at run time
when no context is supplied to the convert function. */ when no context is supplied to the convert function. */
const pcre2_convert_context PRIV(default_convert_context) = { const pcre2_convert_context PRIV(default_convert_context) = {
{ default_malloc, default_free, NULL }, /* Default memory handling */ { default_malloc, default_free, NULL }, /* Default memory handling */
#ifdef _WIN32 #ifdef _WIN32
CHAR_BACKSLASH /* Default path separator */ CHAR_BACKSLASH, /* Default path separator */
#else /* is OS dependent */ CHAR_GRAVE_ACCENT /* Default escape character */
CHAR_SLASH /* Not Windows */ #else /* Not Windows */
CHAR_SLASH, /* Default path separator */
CHAR_BACKSLASH /* Default escape character */
#endif #endif
}; };
@ -454,6 +456,14 @@ ccontext->glob_separator = separator;
return 0; return 0;
} }
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_glob_escape(pcre2_convert_context *ccontext, uint32_t escape)
{
if (escape > 255 || (escape != 0 && !ispunct(escape)))
return PCRE2_ERROR_BADDATA;
ccontext->glob_escape = escape;
return 0;
}
/* End of pcre2_context.c */ /* End of pcre2_context.c */

View File

@ -49,8 +49,10 @@ POSSIBILITY OF SUCH DAMAGE.
PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED) PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \ #define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
PCRE2_CONVERT_GLOB_NO_BACKSLASH|PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \ PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL| \
PCRE2_CONVERT_GLOB_NO_STARSTAR|TYPE_OPTIONS) PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
PCRE2_CONVERT_GLOB_NO_STARSTAR| \
TYPE_OPTIONS)
#define DUMMY_BUFFER_SIZE 100 #define DUMMY_BUFFER_SIZE 100
@ -76,7 +78,7 @@ POSSIBILITY OF SUCH DAMAGE.
/* States for range and POSIX processing */ /* States for range and POSIX processing */
enum { RANGE_NOT_STARTED, RANGE_STARTING, RANGE_STARTED }; enum { RANGE_NOT_STARTED, RANGE_STARTING, RANGE_STARTED };
enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET, enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED }; POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
/* Macro to add a character string to the output buffer, checking for overflow. */ /* Macro to add a character string to the output buffer, checking for overflow. */
@ -89,23 +91,23 @@ enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
*p++ = *s; \ *p++ = *s; \
} \ } \
} }
/* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */ /* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */
static const char *pcre2_escaped_literals = static const char *pcre2_escaped_literals =
STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS
STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN
STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS; STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;
/* Recognized escapes in POSIX basic patterns. */ /* Recognized escapes in POSIX basic patterns. */
static const char *posix_basic_escapes = static const char *posix_basic_escapes =
STR_QUESTION_MARK STR_PLUS STR_VERTICAL_LINE STR_QUESTION_MARK STR_PLUS STR_VERTICAL_LINE
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
STR_0 STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9; STR_0 STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
/************************************************* /*************************************************
@ -186,46 +188,46 @@ while (plength > 0)
if (posix_state >= POSIX_CLASS_NOT_STARTED) if (posix_state >= POSIX_CLASS_NOT_STARTED)
{ {
if (c == CHAR_RIGHT_SQUARE_BRACKET) if (c == CHAR_RIGHT_SQUARE_BRACKET)
{ {
PUTCHARS(STR_RIGHT_SQUARE_BRACKET); PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
posix_state = POSIX_NOT_BRACKET; posix_state = POSIX_NOT_BRACKET;
} }
/* Not the end of the class */ /* Not the end of the class */
else else
{ {
switch (posix_state) switch (posix_state)
{ {
case POSIX_CLASS_STARTED: case POSIX_CLASS_STARTED:
if (c <= 127 && islower(c)) break; /* Remain in started state */ if (c <= 127 && islower(c)) break; /* Remain in started state */
posix_state = POSIX_CLASS_NOT_STARTED; posix_state = POSIX_CLASS_NOT_STARTED;
if (c == CHAR_COLON && plength > 0 && if (c == CHAR_COLON && plength > 0 &&
*posix == CHAR_RIGHT_SQUARE_BRACKET) *posix == CHAR_RIGHT_SQUARE_BRACKET)
{ {
PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET); PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);
plength--; plength--;
posix++; posix++;
continue; /* With next character after :] */ continue; /* With next character after :] */
} }
/* Fall through */ /* Fall through */
case POSIX_CLASS_NOT_STARTED: case POSIX_CLASS_NOT_STARTED:
if (c == CHAR_LEFT_SQUARE_BRACKET) if (c == CHAR_LEFT_SQUARE_BRACKET)
posix_state = POSIX_CLASS_STARTING; posix_state = POSIX_CLASS_STARTING;
break; break;
case POSIX_CLASS_STARTING: case POSIX_CLASS_STARTING:
if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED; if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED;
break; break;
} }
if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH); if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY; if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
memcpy(p, posix - clength, CU2BYTES(clength)); memcpy(p, posix - clength, CU2BYTES(clength));
p += clength; p += clength;
} }
} }
/* Handle a character not within a class. */ /* Handle a character not within a class. */
@ -234,31 +236,31 @@ while (plength > 0)
{ {
case CHAR_LEFT_SQUARE_BRACKET: case CHAR_LEFT_SQUARE_BRACKET:
PUTCHARS(STR_LEFT_SQUARE_BRACKET); PUTCHARS(STR_LEFT_SQUARE_BRACKET);
/* Handle special cases [[:<:]] and [[:>:]] (which PCRE does support) */ /* Handle special cases [[:<:]] and [[:>:]] (which PCRE does support) */
if (plength >= 6) if (plength >= 6)
{ {
if (posix[0] == CHAR_LEFT_SQUARE_BRACKET && if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&
posix[1] == CHAR_COLON && posix[1] == CHAR_COLON &&
(posix[2] == CHAR_LESS_THAN_SIGN || (posix[2] == CHAR_LESS_THAN_SIGN ||
posix[2] == CHAR_GREATER_THAN_SIGN) && posix[2] == CHAR_GREATER_THAN_SIGN) &&
posix[3] == CHAR_COLON && posix[3] == CHAR_COLON &&
posix[4] == CHAR_RIGHT_SQUARE_BRACKET && posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
posix[5] == CHAR_RIGHT_SQUARE_BRACKET) posix[5] == CHAR_RIGHT_SQUARE_BRACKET)
{ {
if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY; if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;
memcpy(p, posix, CU2BYTES(6)); memcpy(p, posix, CU2BYTES(6));
p += 6; p += 6;
posix += 6; posix += 6;
plength -= 6; plength -= 6;
continue; /* With next character */ continue; /* With next character */
} }
} }
/* Handle "normal" character classes */ /* Handle "normal" character classes */
posix_state = POSIX_CLASS_NOT_STARTED; posix_state = POSIX_CLASS_NOT_STARTED;
/* Handle ^ and ] as first characters */ /* Handle ^ and ] as first characters */
@ -275,23 +277,23 @@ while (plength > 0)
posix++; posix++;
plength--; plength--;
PUTCHARS(STR_RIGHT_SQUARE_BRACKET); PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
} }
} }
break; break;
case CHAR_BACKSLASH: case CHAR_BACKSLASH:
if (plength <= 0) return ERROR_END_BACKSLASH; if (plength <= 0) return ERROR_END_BACKSLASH;
if (!extended && *posix < 127 && if (!extended && *posix < 127 &&
strchr(posix_basic_escapes, *posix) != NULL) strchr(posix_basic_escapes, *posix) != NULL)
{ {
if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH); if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY; if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
lastspecial = *p++ = *posix++; lastspecial = *p++ = *posix++;
plength--; plength--;
} }
else nextisliteral = TRUE; else nextisliteral = TRUE;
break; break;
case CHAR_RIGHT_PARENTHESIS: case CHAR_RIGHT_PARENTHESIS:
if (!extended || bracount == 0) goto ESCAPE_LITERAL; if (!extended || bracount == 0) goto ESCAPE_LITERAL;
bracount--; bracount--;
@ -299,60 +301,60 @@ while (plength > 0)
case CHAR_LEFT_PARENTHESIS: case CHAR_LEFT_PARENTHESIS:
bracount++; bracount++;
/* Fall through */ /* Fall through */
case CHAR_QUESTION_MARK: case CHAR_QUESTION_MARK:
case CHAR_PLUS: case CHAR_PLUS:
case CHAR_LEFT_CURLY_BRACKET: case CHAR_LEFT_CURLY_BRACKET:
case CHAR_RIGHT_CURLY_BRACKET: case CHAR_RIGHT_CURLY_BRACKET:
case CHAR_VERTICAL_LINE: case CHAR_VERTICAL_LINE:
if (!extended) goto ESCAPE_LITERAL; if (!extended) goto ESCAPE_LITERAL;
/* Fall through */ /* Fall through */
case CHAR_DOT: case CHAR_DOT:
case CHAR_DOLLAR_SIGN: case CHAR_DOLLAR_SIGN:
posix_state = POSIX_NOT_BRACKET; posix_state = POSIX_NOT_BRACKET;
COPY_SPECIAL: COPY_SPECIAL:
lastspecial = c; lastspecial = c;
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY; if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
*p++ = c; *p++ = c;
break; break;
case CHAR_ASTERISK: case CHAR_ASTERISK:
if (lastspecial != CHAR_ASTERISK) if (lastspecial != CHAR_ASTERISK)
{ {
if (!extended && posix_state < POSIX_NOT_BRACKET) if (!extended && posix_state < POSIX_NOT_BRACKET)
goto ESCAPE_LITERAL; goto ESCAPE_LITERAL;
goto COPY_SPECIAL; goto COPY_SPECIAL;
} }
break; /* Ignore second and subsequent asterisks */ break; /* Ignore second and subsequent asterisks */
case CHAR_CIRCUMFLEX_ACCENT: case CHAR_CIRCUMFLEX_ACCENT:
if (extended) goto COPY_SPECIAL; if (extended) goto COPY_SPECIAL;
if (posix_state == POSIX_START_REGEX || if (posix_state == POSIX_START_REGEX ||
lastspecial == CHAR_LEFT_PARENTHESIS) lastspecial == CHAR_LEFT_PARENTHESIS)
{ {
posix_state = POSIX_ANCHORED; posix_state = POSIX_ANCHORED;
goto COPY_SPECIAL; goto COPY_SPECIAL;
} }
/* Fall through */ /* Fall through */
default: default:
if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL) if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
{ {
ESCAPE_LITERAL: ESCAPE_LITERAL:
PUTCHARS(STR_BACKSLASH); PUTCHARS(STR_BACKSLASH);
} }
lastspecial = 0xff; /* Indicates nothing special */ lastspecial = 0xff; /* Indicates nothing special */
if (p + clength > endp) return PCRE2_ERROR_NOMEMORY; if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
memcpy(p, posix - clength, CU2BYTES(clength)); memcpy(p, posix - clength, CU2BYTES(clength));
p += clength; p += clength;
posix_state = POSIX_NOT_BRACKET; posix_state = POSIX_NOT_BRACKET;
break; break;
} }
} }
if (posix_state >= POSIX_CLASS_NOT_STARTED) if (posix_state >= POSIX_CLASS_NOT_STARTED)
return ERROR_MISSING_SQUARE_BRACKET; return ERROR_MISSING_SQUARE_BRACKET;
convlength += p - pp; /* Final segment */ convlength += p - pp; /* Final segment */
*bufflenptr = convlength; *bufflenptr = convlength;
@ -726,7 +728,7 @@ PCRE2_SPTR pattern_start = pattern;
PCRE2_SPTR pattern_end = pattern + plength; PCRE2_SPTR pattern_end = pattern + plength;
PCRE2_UCHAR separator = ccontext->glob_separator; PCRE2_UCHAR separator = ccontext->glob_separator;
PCRE2_UCHAR c; PCRE2_UCHAR c;
BOOL no_backslash = (options & PCRE2_CONVERT_GLOB_NO_BACKSLASH) != 0; BOOL no_escape = ccontext->glob_escape == 0;
BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0; BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0;
BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0; BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
BOOL in_atomic = FALSE; BOOL in_atomic = FALSE;
@ -734,6 +736,8 @@ BOOL after_starstar = FALSE;
BOOL with_escape, is_start; BOOL with_escape, is_start;
int result, len; int result, len;
(void)utf; /* Avoid compiler warning */
if (separator >= 128) if (separator >= 128)
{ {
/* Currently only ASCII separators are supported. */ /* Currently only ASCII separators are supported. */
@ -805,7 +809,7 @@ while (pattern < pattern_end)
break; break;
} }
if (!no_backslash && *pattern == CHAR_BACKSLASH) if (!no_escape && *pattern == ccontext->glob_escape)
{ {
pattern++; pattern++;
if (pattern >= pattern_end) if (pattern >= pattern_end)
@ -925,11 +929,11 @@ while (pattern < pattern_end)
continue; continue;
} }
if (!no_backslash && c == CHAR_BACKSLASH) if (!no_escape && c == ccontext->glob_escape)
{ {
if (pattern >= pattern_end) if (pattern >= pattern_end)
{ {
result = ERROR_END_BACKSLASH; result = PCRE2_ERROR_CONVERT_SYNTAX;
break; break;
} }
c = *pattern++; c = *pattern++;

View File

@ -572,7 +572,7 @@ typedef struct pcre2_real_compile_context {
uint16_t bsr_convention; uint16_t bsr_convention;
uint16_t newline_convention; uint16_t newline_convention;
uint32_t parens_nest_limit; uint32_t parens_nest_limit;
uint32_t extra_options; uint32_t extra_options;
} pcre2_real_compile_context; } pcre2_real_compile_context;
/* The real match context structure. */ /* The real match context structure. */
@ -586,7 +586,7 @@ typedef struct pcre2_real_match_context {
int (*callout)(pcre2_callout_block *, void *); int (*callout)(pcre2_callout_block *, void *);
void *callout_data; void *callout_data;
PCRE2_SIZE offset_limit; PCRE2_SIZE offset_limit;
uint32_t heap_limit; uint32_t heap_limit;
uint32_t match_limit; uint32_t match_limit;
uint32_t depth_limit; uint32_t depth_limit;
} pcre2_real_match_context; } pcre2_real_match_context;
@ -595,7 +595,8 @@ typedef struct pcre2_real_match_context {
typedef struct pcre2_real_convert_context { typedef struct pcre2_real_convert_context {
pcre2_memctl memctl; pcre2_memctl memctl;
uint32_t glob_separator; uint32_t glob_separator;
uint32_t glob_escape;
} pcre2_real_convert_context; } pcre2_real_convert_context;
/* The real compiled code structure. The type for the blocksize field is /* The real compiled code structure. The type for the blocksize field is
@ -623,7 +624,7 @@ typedef struct pcre2_real_code {
uint32_t compile_options; /* Options passed to pcre2_compile() */ uint32_t compile_options; /* Options passed to pcre2_compile() */
uint32_t overall_options; /* Options after processing the pattern */ uint32_t overall_options; /* Options after processing the pattern */
uint32_t flags; /* Various state flags */ uint32_t flags; /* Various state flags */
uint32_t limit_heap; /* Limit set in the pattern */ uint32_t limit_heap; /* Limit set in the pattern */
uint32_t limit_match; /* Limit set in the pattern */ uint32_t limit_match; /* Limit set in the pattern */
uint32_t limit_depth; /* Limit set in the pattern */ uint32_t limit_depth; /* Limit set in the pattern */
uint32_t first_codeunit; /* Starting code unit */ uint32_t first_codeunit; /* Starting code unit */
@ -638,9 +639,9 @@ typedef struct pcre2_real_code {
uint16_t name_count; /* Number of name entries in the table */ uint16_t name_count; /* Number of name entries in the table */
} pcre2_real_code; } pcre2_real_code;
/* The real match data structure. Define ovector large so that array bound /* The real match data structure. Define ovector large so that array bound
checkers don't grumble. Memory for this structure is obtained by calling checkers don't grumble. Memory for this structure is obtained by calling
pcre2_match_data_create(), which sets the size as the offset of ovector plus pcre2_match_data_create(), which sets the size as the offset of ovector plus
pairs of elements for each capturing group. (See also the heapframe structure pairs of elements for each capturing group. (See also the heapframe structure
below.) */ below.) */
@ -781,7 +782,7 @@ typedef struct heapframe {
PCRE2_SPTR ecode; /* The current position in the pattern */ PCRE2_SPTR ecode; /* The current position in the pattern */
PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */ PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */
PCRE2_SIZE length; /* Used for character, string, or code lengths */ PCRE2_SIZE length; /* Used for character, string, or code lengths */
PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */ PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */
PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */ PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */
uint32_t rdepth; /* "Recursion" depth */ uint32_t rdepth; /* "Recursion" depth */
uint32_t group_frame_type; /* Type information for group frames */ uint32_t group_frame_type; /* Type information for group frames */
@ -798,15 +799,15 @@ typedef struct heapframe {
#endif #endif
/* The rest have to be copied from the previous frame whenever a new frame /* The rest have to be copied from the previous frame whenever a new frame
becomes current. The final field is specified as a large vector so that becomes current. The final field is specified as a large vector so that
runtime array bound checks don't catch references to it. However, for any runtime array bound checks don't catch references to it. However, for any
specific call to pcre2_match() the memory allocated for each frame structure specific call to pcre2_match() the memory allocated for each frame structure
allows for exactly the right size ovector for the number of capturing allows for exactly the right size ovector for the number of capturing
parentheses. */ parentheses. */
PCRE2_SPTR eptr; /* MUST BE FIRST */ PCRE2_SPTR eptr; /* MUST BE FIRST */
PCRE2_SPTR start_match; /* Can be adjusted by \K */ PCRE2_SPTR start_match; /* Can be adjusted by \K */
PCRE2_SPTR mark; /* Most recent mark on the success path */ PCRE2_SPTR mark; /* Most recent mark on the success path */
uint32_t current_recurse; /* Current (deepest) recursion number */ uint32_t current_recurse; /* Current (deepest) recursion number */
uint32_t capture_last; /* Most recent capture */ uint32_t capture_last; /* Most recent capture */
PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */ PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */
@ -825,7 +826,7 @@ typedef struct match_block {
PCRE2_SIZE frame_vector_size; /* Size of a backtracking frame */ PCRE2_SIZE frame_vector_size; /* Size of a backtracking frame */
heapframe *match_frames; /* Points to vector of frames */ heapframe *match_frames; /* Points to vector of frames */
heapframe *match_frames_top; /* Points after the end of the vector */ heapframe *match_frames_top; /* Points after the end of the vector */
heapframe *stack_frames; /* The original vector on the stack */ heapframe *stack_frames; /* The original vector on the stack */
PCRE2_SIZE heap_limit; /* As it says */ PCRE2_SIZE heap_limit; /* As it says */
uint32_t match_limit; /* As it says */ uint32_t match_limit; /* As it says */
uint32_t match_limit_depth; /* As it says */ uint32_t match_limit_depth; /* As it says */
@ -852,7 +853,7 @@ typedef struct match_block {
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */ PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
PCRE2_SPTR verb_ecode_ptr; /* For passing back info */ PCRE2_SPTR verb_ecode_ptr; /* For passing back info */
PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */ PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */
uint32_t verb_current_recurse; /* Current recurse when (*VERB) happens */ uint32_t verb_current_recurse; /* Current recurse when (*VERB) happens */
uint32_t moptions; /* Match options */ uint32_t moptions; /* Match options */
uint32_t poptions; /* Pattern options */ uint32_t poptions; /* Pattern options */
uint32_t skip_arg_count; /* For counting SKIP_ARGs */ uint32_t skip_arg_count; /* For counting SKIP_ARGs */

View File

@ -400,14 +400,14 @@ typedef struct convertstruct {
} convertstruct; } convertstruct;
static convertstruct convertlist[] = { static convertstruct convertlist[] = {
{ "glob", PCRE2_CONVERT_GLOB }, { "glob", PCRE2_CONVERT_GLOB },
{ "glob_basic", PCRE2_CONVERT_GLOB_BASIC }, { "glob_basic", PCRE2_CONVERT_GLOB_BASIC },
{ "glob_no_backslash", PCRE2_CONVERT_GLOB_NO_BACKSLASH }, { "glob_no_dot_special", PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL },
{ "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR }, { "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR },
{ "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR }, { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
{ "posix_basic", PCRE2_CONVERT_POSIX_BASIC }, { "posix_basic", PCRE2_CONVERT_POSIX_BASIC },
{ "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED }, { "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED },
{ "unset", CONVERT_UNSET }}; { "unset", CONVERT_UNSET }};
#define convertlistcount (sizeof(convertlist)/sizeof(convertstruct)) #define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
@ -524,6 +524,7 @@ typedef struct patctl { /* Structure for pattern modifiers. */
uint32_t tables_id; uint32_t tables_id;
uint32_t convert_type; uint32_t convert_type;
uint32_t convert_length; uint32_t convert_length;
uint32_t convert_glob_escape;
uint32_t convert_glob_separator; uint32_t convert_glob_separator;
uint32_t regerror_buffsize; uint32_t regerror_buffsize;
uint8_t locale[LOCALESIZE]; uint8_t locale[LOCALESIZE];
@ -599,6 +600,7 @@ static modstruct modlist[] = {
{ "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) }, { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
{ "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) }, { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
{ "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) }, { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) },
{ "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) },
{ "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) }, { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
{ "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) }, { "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) },
{ "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) }, { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
@ -1286,6 +1288,14 @@ are supported. */
else \ else \
r = pcre2_set_glob_separator_32(G(a,32),b) r = pcre2_set_glob_separator_32(G(a,32),b)
#define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
if (test_mode == PCRE8_MODE) \
r = pcre2_set_glob_escape_8(G(a,8),b); \
else if (test_mode == PCRE16_MODE) \
r = pcre2_set_glob_escape_16(G(a,16),b); \
else \
r = pcre2_set_glob_escape_32(G(a,32),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) \ #define PCRE2_SET_HEAP_LIMIT(a,b) \
if (test_mode == PCRE8_MODE) \ if (test_mode == PCRE8_MODE) \
pcre2_set_heap_limit_8(G(a,8),b); \ pcre2_set_heap_limit_8(G(a,8),b); \
@ -1753,6 +1763,12 @@ the three different cases. */
else \ else \
G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b) G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
else \
r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \ #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \ r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
@ -1983,6 +1999,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_8(G(a,8),b,c) pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b) #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b) #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b) #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
@ -2086,6 +2103,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_16(G(a,16),b,c) pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b) #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b) #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b) #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
@ -2189,6 +2207,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_32(G(a,32),b,c) pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b) #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b) #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b) #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
@ -2903,13 +2922,14 @@ return yield;
*************************************************/ *************************************************/
/* Must handle UTF-32 strings in utf mode. Yields number of characters printed. /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
For printing *MARK strings, a negative length is given.If handed a NULL file, For printing *MARK strings, a negative length is given. If handed a NULL file,
just counts chars without printing. */ just counts chars without printing. */
static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f) static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
{ {
int yield = 0; int yield = 0;
(void)(utf); /* Avoid compiler warning */ (void)(utf); /* Avoid compiler warning */
if (length < 0) length = p[-1]; if (length < 0) length = p[-1];
while (length-- > 0) while (length-- > 0)
{ {
@ -5385,6 +5405,21 @@ if (pat_patctl.convert_type != CONVERT_UNSET)
convert_options |= PCRE2_CONVERT_NO_UTF_CHECK; convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
CONCTXCPY(con_context, default_con_context); CONCTXCPY(con_context, default_con_context);
if (pat_patctl.convert_glob_escape != 0)
{
uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
pat_patctl.convert_glob_escape;
PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
if (rc != 0)
{
fprintf(outfile, "** Invalid glob escape '%c'\n",
pat_patctl.convert_glob_escape);
convert_return = PR_SKIP;
goto CONVERT_FINISH;
}
}
if (pat_patctl.convert_glob_separator != 0) if (pat_patctl.convert_glob_separator != 0)
{ {
PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator); PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);

View File

@ -281,12 +281,18 @@
/??a??/ /??a??/
#pattern convert=unset #pattern convert=unset
#pattern convert=glob:glob_no_backslash #pattern convert=glob,convert_glob_escape=0
/a\b\cd/ /a\b\cd/
/**\/a/ /**\/a/
/a`*b/convert_glob_escape=`
/a`*b/convert_glob_escape=0
/a`*b/convert_glob_escape=x
#pattern convert=unset:posix_extended #pattern convert=unset:posix_extended
/a[[:>:]z/ /a[[:>:]z/

11
testdata/testoutput24 vendored
View File

@ -429,7 +429,7 @@ No match
(?s)\A..a..\z (?s)\A..a..\z
#pattern convert=unset #pattern convert=unset
#pattern convert=glob:glob_no_backslash #pattern convert=glob,convert_glob_escape=0
/a\b\cd/ /a\b\cd/
(?s)\Aa\\b\\cd\z (?s)\Aa\\b\\cd\z
@ -437,6 +437,15 @@ No match
/**\/a/ /**\/a/
** Pattern conversion error at offset 2: invalid syntax ** Pattern conversion error at offset 2: invalid syntax
/a`*b/convert_glob_escape=`
(?s)\Aa\*b\z
/a`*b/convert_glob_escape=0
(?s)\Aa`(*COMMIT)[^/]*?b\z
/a`*b/convert_glob_escape=x
** Invalid glob escape 'x'
#pattern convert=unset:posix_extended #pattern convert=unset:posix_extended
/a[[:>:]z/ /a[[:>:]z/