diff --git a/src/pcre2.h b/src/pcre2.h index e4c867b..40cf0d6 100644 --- a/src/pcre2.h +++ b/src/pcre2.h @@ -187,7 +187,7 @@ ignored for pcre2_jit_match(). */ #define PCRE2_CONVERT_POSIX_BASIC 0x00000004u #define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u #define PCRE2_CONVERT_GLOB 0x00000010u -#define PCRE2_CONVERT_GLOB_NO_BACKSLASH 0x00000030u +#define PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL 0x00000030u #define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000050u #define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000090u #define PCRE2_CONVERT_GLOB_BASIC 0x000000f0u @@ -496,6 +496,8 @@ PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \ *pcre2_convert_context_create(pcre2_general_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_convert_context_free(pcre2_convert_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_glob_separator(pcre2_convert_context *, uint32_t); @@ -733,6 +735,7 @@ pcre2_compile are called by application code. */ #define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_) #define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) #define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_) +#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_) #define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_) #define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_) #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 392dbe8..12e8b1e 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -187,7 +187,7 @@ ignored for pcre2_jit_match(). */ #define PCRE2_CONVERT_POSIX_BASIC 0x00000004u #define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u #define PCRE2_CONVERT_GLOB 0x00000010u -#define PCRE2_CONVERT_GLOB_NO_BACKSLASH 0x00000030u +#define PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL 0x00000030u #define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000050u #define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000090u #define PCRE2_CONVERT_GLOB_BASIC 0x000000f0u @@ -496,6 +496,8 @@ PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \ *pcre2_convert_context_create(pcre2_general_context *); \ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \ pcre2_convert_context_free(pcre2_convert_context *); \ +PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ + pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ pcre2_set_glob_separator(pcre2_convert_context *, uint32_t); @@ -733,6 +735,7 @@ pcre2_compile are called by application code. */ #define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_) #define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) #define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_) +#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_) #define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_) #define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_) #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) diff --git a/src/pcre2_context.c b/src/pcre2_context.c index 965c509..65e59fa 100644 --- a/src/pcre2_context.c +++ b/src/pcre2_context.c @@ -189,15 +189,17 @@ return mcontext; } -/* A default covert context is set up to save having to initialize at run time +/* A default convert context is set up to save having to initialize at run time when no context is supplied to the convert function. */ const pcre2_convert_context PRIV(default_convert_context) = { { default_malloc, default_free, NULL }, /* Default memory handling */ #ifdef _WIN32 - CHAR_BACKSLASH /* Default path separator */ -#else /* is OS dependent */ - CHAR_SLASH /* Not Windows */ + CHAR_BACKSLASH, /* Default path separator */ + CHAR_GRAVE_ACCENT /* Default escape character */ +#else /* Not Windows */ + CHAR_SLASH, /* Default path separator */ + CHAR_BACKSLASH /* Default escape character */ #endif }; @@ -454,6 +456,14 @@ ccontext->glob_separator = separator; return 0; } +PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION +pcre2_set_glob_escape(pcre2_convert_context *ccontext, uint32_t escape) +{ +if (escape > 255 || (escape != 0 && !ispunct(escape))) + return PCRE2_ERROR_BADDATA; +ccontext->glob_escape = escape; +return 0; +} /* End of pcre2_context.c */ diff --git a/src/pcre2_convert.c b/src/pcre2_convert.c index a58a480..0cf74a8 100644 --- a/src/pcre2_convert.c +++ b/src/pcre2_convert.c @@ -49,8 +49,10 @@ POSSIBILITY OF SUCH DAMAGE. PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED) #define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \ - PCRE2_CONVERT_GLOB_NO_BACKSLASH|PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \ - PCRE2_CONVERT_GLOB_NO_STARSTAR|TYPE_OPTIONS) + PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL| \ + PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \ + PCRE2_CONVERT_GLOB_NO_STARSTAR| \ + TYPE_OPTIONS) #define DUMMY_BUFFER_SIZE 100 @@ -76,7 +78,7 @@ POSSIBILITY OF SUCH DAMAGE. /* States for range and POSIX processing */ enum { RANGE_NOT_STARTED, RANGE_STARTING, RANGE_STARTED }; -enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET, +enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET, POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED }; /* Macro to add a character string to the output buffer, checking for overflow. */ @@ -89,23 +91,23 @@ enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET, *p++ = *s; \ } \ } - + /* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */ static const char *pcre2_escaped_literals = - STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS - STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN + STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS + STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS; - + /* Recognized escapes in POSIX basic patterns. */ static const char *posix_basic_escapes = STR_QUESTION_MARK STR_PLUS STR_VERTICAL_LINE STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS - STR_0 STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9; - + STR_0 STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9; + /************************************************* @@ -186,46 +188,46 @@ while (plength > 0) if (posix_state >= POSIX_CLASS_NOT_STARTED) { - if (c == CHAR_RIGHT_SQUARE_BRACKET) + if (c == CHAR_RIGHT_SQUARE_BRACKET) { - PUTCHARS(STR_RIGHT_SQUARE_BRACKET); + PUTCHARS(STR_RIGHT_SQUARE_BRACKET); posix_state = POSIX_NOT_BRACKET; } - + /* Not the end of the class */ - - else + + else { switch (posix_state) { case POSIX_CLASS_STARTED: if (c <= 127 && islower(c)) break; /* Remain in started state */ - posix_state = POSIX_CLASS_NOT_STARTED; - if (c == CHAR_COLON && plength > 0 && + posix_state = POSIX_CLASS_NOT_STARTED; + if (c == CHAR_COLON && plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET) { PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET); - plength--; + plength--; posix++; - continue; /* With next character after :] */ + continue; /* With next character after :] */ } - /* Fall through */ - - case POSIX_CLASS_NOT_STARTED: - if (c == CHAR_LEFT_SQUARE_BRACKET) + /* Fall through */ + + case POSIX_CLASS_NOT_STARTED: + if (c == CHAR_LEFT_SQUARE_BRACKET) posix_state = POSIX_CLASS_STARTING; break; - + case POSIX_CLASS_STARTING: if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED; break; - } - + } + if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH); if (p + clength > endp) return PCRE2_ERROR_NOMEMORY; memcpy(p, posix - clength, CU2BYTES(clength)); p += clength; - } + } } /* Handle a character not within a class. */ @@ -234,31 +236,31 @@ while (plength > 0) { case CHAR_LEFT_SQUARE_BRACKET: PUTCHARS(STR_LEFT_SQUARE_BRACKET); - + /* Handle special cases [[:<:]] and [[:>:]] (which PCRE does support) */ - + if (plength >= 6) { if (posix[0] == CHAR_LEFT_SQUARE_BRACKET && posix[1] == CHAR_COLON && - (posix[2] == CHAR_LESS_THAN_SIGN || + (posix[2] == CHAR_LESS_THAN_SIGN || posix[2] == CHAR_GREATER_THAN_SIGN) && posix[3] == CHAR_COLON && - posix[4] == CHAR_RIGHT_SQUARE_BRACKET && + posix[4] == CHAR_RIGHT_SQUARE_BRACKET && posix[5] == CHAR_RIGHT_SQUARE_BRACKET) { if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY; memcpy(p, posix, CU2BYTES(6)); p += 6; posix += 6; - plength -= 6; - continue; /* With next character */ + plength -= 6; + continue; /* With next character */ } - } - + } + /* Handle "normal" character classes */ - - posix_state = POSIX_CLASS_NOT_STARTED; + + posix_state = POSIX_CLASS_NOT_STARTED; /* Handle ^ and ] as first characters */ @@ -275,23 +277,23 @@ while (plength > 0) posix++; plength--; PUTCHARS(STR_RIGHT_SQUARE_BRACKET); - } + } } break; case CHAR_BACKSLASH: if (plength <= 0) return ERROR_END_BACKSLASH; - if (!extended && *posix < 127 && - strchr(posix_basic_escapes, *posix) != NULL) + if (!extended && *posix < 127 && + strchr(posix_basic_escapes, *posix) != NULL) { - if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH); + if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH); if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY; lastspecial = *p++ = *posix++; - plength--; + plength--; } else nextisliteral = TRUE; break; - + case CHAR_RIGHT_PARENTHESIS: if (!extended || bracount == 0) goto ESCAPE_LITERAL; bracount--; @@ -299,60 +301,60 @@ while (plength > 0) case CHAR_LEFT_PARENTHESIS: bracount++; - /* Fall through */ + /* Fall through */ case CHAR_QUESTION_MARK: case CHAR_PLUS: - case CHAR_LEFT_CURLY_BRACKET: - case CHAR_RIGHT_CURLY_BRACKET: + case CHAR_LEFT_CURLY_BRACKET: + case CHAR_RIGHT_CURLY_BRACKET: case CHAR_VERTICAL_LINE: if (!extended) goto ESCAPE_LITERAL; - /* Fall through */ - + /* Fall through */ + case CHAR_DOT: - case CHAR_DOLLAR_SIGN: - posix_state = POSIX_NOT_BRACKET; + case CHAR_DOLLAR_SIGN: + posix_state = POSIX_NOT_BRACKET; COPY_SPECIAL: - lastspecial = c; + lastspecial = c; if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY; *p++ = c; - break; + break; case CHAR_ASTERISK: - if (lastspecial != CHAR_ASTERISK) + if (lastspecial != CHAR_ASTERISK) { if (!extended && posix_state < POSIX_NOT_BRACKET) - goto ESCAPE_LITERAL; + goto ESCAPE_LITERAL; goto COPY_SPECIAL; - } - break; /* Ignore second and subsequent asterisks */ + } + break; /* Ignore second and subsequent asterisks */ case CHAR_CIRCUMFLEX_ACCENT: if (extended) goto COPY_SPECIAL; - if (posix_state == POSIX_START_REGEX || - lastspecial == CHAR_LEFT_PARENTHESIS) + if (posix_state == POSIX_START_REGEX || + lastspecial == CHAR_LEFT_PARENTHESIS) { posix_state = POSIX_ANCHORED; goto COPY_SPECIAL; - } - /* Fall through */ + } + /* Fall through */ default: if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL) { - ESCAPE_LITERAL: + ESCAPE_LITERAL: PUTCHARS(STR_BACKSLASH); } - lastspecial = 0xff; /* Indicates nothing special */ + lastspecial = 0xff; /* Indicates nothing special */ if (p + clength > endp) return PCRE2_ERROR_NOMEMORY; memcpy(p, posix - clength, CU2BYTES(clength)); p += clength; - posix_state = POSIX_NOT_BRACKET; + posix_state = POSIX_NOT_BRACKET; break; } } -if (posix_state >= POSIX_CLASS_NOT_STARTED) +if (posix_state >= POSIX_CLASS_NOT_STARTED) return ERROR_MISSING_SQUARE_BRACKET; convlength += p - pp; /* Final segment */ *bufflenptr = convlength; @@ -726,7 +728,7 @@ PCRE2_SPTR pattern_start = pattern; PCRE2_SPTR pattern_end = pattern + plength; PCRE2_UCHAR separator = ccontext->glob_separator; PCRE2_UCHAR c; -BOOL no_backslash = (options & PCRE2_CONVERT_GLOB_NO_BACKSLASH) != 0; +BOOL no_escape = ccontext->glob_escape == 0; BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0; BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0; BOOL in_atomic = FALSE; @@ -734,6 +736,8 @@ BOOL after_starstar = FALSE; BOOL with_escape, is_start; int result, len; +(void)utf; /* Avoid compiler warning */ + if (separator >= 128) { /* Currently only ASCII separators are supported. */ @@ -805,7 +809,7 @@ while (pattern < pattern_end) break; } - if (!no_backslash && *pattern == CHAR_BACKSLASH) + if (!no_escape && *pattern == ccontext->glob_escape) { pattern++; if (pattern >= pattern_end) @@ -925,11 +929,11 @@ while (pattern < pattern_end) continue; } - if (!no_backslash && c == CHAR_BACKSLASH) + if (!no_escape && c == ccontext->glob_escape) { if (pattern >= pattern_end) { - result = ERROR_END_BACKSLASH; + result = PCRE2_ERROR_CONVERT_SYNTAX; break; } c = *pattern++; diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index c1eea08..c5af7df 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -572,7 +572,7 @@ typedef struct pcre2_real_compile_context { uint16_t bsr_convention; uint16_t newline_convention; uint32_t parens_nest_limit; - uint32_t extra_options; + uint32_t extra_options; } pcre2_real_compile_context; /* The real match context structure. */ @@ -586,7 +586,7 @@ typedef struct pcre2_real_match_context { int (*callout)(pcre2_callout_block *, void *); void *callout_data; PCRE2_SIZE offset_limit; - uint32_t heap_limit; + uint32_t heap_limit; uint32_t match_limit; uint32_t depth_limit; } pcre2_real_match_context; @@ -595,7 +595,8 @@ typedef struct pcre2_real_match_context { typedef struct pcre2_real_convert_context { pcre2_memctl memctl; - uint32_t glob_separator; + uint32_t glob_separator; + uint32_t glob_escape; } pcre2_real_convert_context; /* The real compiled code structure. The type for the blocksize field is @@ -623,7 +624,7 @@ typedef struct pcre2_real_code { uint32_t compile_options; /* Options passed to pcre2_compile() */ uint32_t overall_options; /* Options after processing the pattern */ uint32_t flags; /* Various state flags */ - uint32_t limit_heap; /* Limit set in the pattern */ + uint32_t limit_heap; /* Limit set in the pattern */ uint32_t limit_match; /* Limit set in the pattern */ uint32_t limit_depth; /* Limit set in the pattern */ uint32_t first_codeunit; /* Starting code unit */ @@ -638,9 +639,9 @@ typedef struct pcre2_real_code { uint16_t name_count; /* Number of name entries in the table */ } pcre2_real_code; -/* The real match data structure. Define ovector large so that array bound -checkers don't grumble. Memory for this structure is obtained by calling -pcre2_match_data_create(), which sets the size as the offset of ovector plus +/* The real match data structure. Define ovector large so that array bound +checkers don't grumble. Memory for this structure is obtained by calling +pcre2_match_data_create(), which sets the size as the offset of ovector plus pairs of elements for each capturing group. (See also the heapframe structure below.) */ @@ -781,7 +782,7 @@ typedef struct heapframe { PCRE2_SPTR ecode; /* The current position in the pattern */ PCRE2_SPTR temp_sptr[2]; /* Used for short-term PCRE_SPTR values */ PCRE2_SIZE length; /* Used for character, string, or code lengths */ - PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */ + PCRE2_SIZE back_frame; /* Amount to subtract on RRETURN */ PCRE2_SIZE temp_size; /* Used for short-term PCRE2_SIZE values */ uint32_t rdepth; /* "Recursion" depth */ uint32_t group_frame_type; /* Type information for group frames */ @@ -798,15 +799,15 @@ typedef struct heapframe { #endif /* The rest have to be copied from the previous frame whenever a new frame - becomes current. The final field is specified as a large vector so that - runtime array bound checks don't catch references to it. However, for any - specific call to pcre2_match() the memory allocated for each frame structure - allows for exactly the right size ovector for the number of capturing + becomes current. The final field is specified as a large vector so that + runtime array bound checks don't catch references to it. However, for any + specific call to pcre2_match() the memory allocated for each frame structure + allows for exactly the right size ovector for the number of capturing parentheses. */ PCRE2_SPTR eptr; /* MUST BE FIRST */ PCRE2_SPTR start_match; /* Can be adjusted by \K */ - PCRE2_SPTR mark; /* Most recent mark on the success path */ + PCRE2_SPTR mark; /* Most recent mark on the success path */ uint32_t current_recurse; /* Current (deepest) recursion number */ uint32_t capture_last; /* Most recent capture */ PCRE2_SIZE last_group_offset; /* Saved offset to most recent group frame */ @@ -825,7 +826,7 @@ typedef struct match_block { PCRE2_SIZE frame_vector_size; /* Size of a backtracking frame */ heapframe *match_frames; /* Points to vector of frames */ heapframe *match_frames_top; /* Points after the end of the vector */ - heapframe *stack_frames; /* The original vector on the stack */ + heapframe *stack_frames; /* The original vector on the stack */ PCRE2_SIZE heap_limit; /* As it says */ uint32_t match_limit; /* As it says */ uint32_t match_limit_depth; /* As it says */ @@ -852,7 +853,7 @@ typedef struct match_block { PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */ PCRE2_SPTR verb_ecode_ptr; /* For passing back info */ PCRE2_SPTR verb_skip_ptr; /* For passing back a (*SKIP) name */ - uint32_t verb_current_recurse; /* Current recurse when (*VERB) happens */ + uint32_t verb_current_recurse; /* Current recurse when (*VERB) happens */ uint32_t moptions; /* Match options */ uint32_t poptions; /* Pattern options */ uint32_t skip_arg_count; /* For counting SKIP_ARGs */ diff --git a/src/pcre2test.c b/src/pcre2test.c index 4d6eec8..e2af2c1 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -400,14 +400,14 @@ typedef struct convertstruct { } convertstruct; static convertstruct convertlist[] = { - { "glob", PCRE2_CONVERT_GLOB }, - { "glob_basic", PCRE2_CONVERT_GLOB_BASIC }, - { "glob_no_backslash", PCRE2_CONVERT_GLOB_NO_BACKSLASH }, - { "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR }, + { "glob", PCRE2_CONVERT_GLOB }, + { "glob_basic", PCRE2_CONVERT_GLOB_BASIC }, + { "glob_no_dot_special", PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL }, + { "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR }, { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR }, - { "posix_basic", PCRE2_CONVERT_POSIX_BASIC }, - { "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED }, - { "unset", CONVERT_UNSET }}; + { "posix_basic", PCRE2_CONVERT_POSIX_BASIC }, + { "posix_extended", PCRE2_CONVERT_POSIX_EXTENDED }, + { "unset", CONVERT_UNSET }}; #define convertlistcount (sizeof(convertlist)/sizeof(convertstruct)) @@ -524,6 +524,7 @@ typedef struct patctl { /* Structure for pattern modifiers. */ uint32_t tables_id; uint32_t convert_type; uint32_t convert_length; + uint32_t convert_glob_escape; uint32_t convert_glob_separator; uint32_t regerror_buffsize; uint8_t locale[LOCALESIZE]; @@ -599,6 +600,7 @@ static modstruct modlist[] = { { "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) }, { "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) }, { "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) }, + { "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) }, { "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) }, { "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) }, { "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) }, @@ -1286,6 +1288,14 @@ are supported. */ else \ r = pcre2_set_glob_separator_32(G(a,32),b) +#define PCRE2_SET_GLOB_ESCAPE(r,a,b) \ + if (test_mode == PCRE8_MODE) \ + r = pcre2_set_glob_escape_8(G(a,8),b); \ + else if (test_mode == PCRE16_MODE) \ + r = pcre2_set_glob_escape_16(G(a,16),b); \ + else \ + r = pcre2_set_glob_escape_32(G(a,32),b) + #define PCRE2_SET_HEAP_LIMIT(a,b) \ if (test_mode == PCRE8_MODE) \ pcre2_set_heap_limit_8(G(a,8),b); \ @@ -1753,6 +1763,12 @@ the three different cases. */ else \ G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b) +#define PCRE2_SET_GLOB_ESCAPE(r,a,b) \ + if (test_mode == G(G(PCRE,BITONE),_MODE)) \ + r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \ + else \ + r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b) + #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \ @@ -1983,6 +1999,7 @@ the three different cases. */ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ pcre2_set_compile_recursion_guard_8(G(a,8),b,c) #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b) +#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b) #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b) #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b) @@ -2086,6 +2103,7 @@ the three different cases. */ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ pcre2_set_compile_recursion_guard_16(G(a,16),b,c) #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b) +#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b) #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b) #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b) @@ -2189,6 +2207,7 @@ the three different cases. */ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ pcre2_set_compile_recursion_guard_32(G(a,32),b,c) #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b) +#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b) #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b) #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b) @@ -2903,13 +2922,14 @@ return yield; *************************************************/ /* Must handle UTF-32 strings in utf mode. Yields number of characters printed. -For printing *MARK strings, a negative length is given.If handed a NULL file, +For printing *MARK strings, a negative length is given. If handed a NULL file, just counts chars without printing. */ static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f) { int yield = 0; (void)(utf); /* Avoid compiler warning */ + if (length < 0) length = p[-1]; while (length-- > 0) { @@ -5385,6 +5405,21 @@ if (pat_patctl.convert_type != CONVERT_UNSET) convert_options |= PCRE2_CONVERT_NO_UTF_CHECK; CONCTXCPY(con_context, default_con_context); + + if (pat_patctl.convert_glob_escape != 0) + { + uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 : + pat_patctl.convert_glob_escape; + PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape); + if (rc != 0) + { + fprintf(outfile, "** Invalid glob escape '%c'\n", + pat_patctl.convert_glob_escape); + convert_return = PR_SKIP; + goto CONVERT_FINISH; + } + } + if (pat_patctl.convert_glob_separator != 0) { PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator); diff --git a/testdata/testinput24 b/testdata/testinput24 index 282b697..20d4d5f 100644 --- a/testdata/testinput24 +++ b/testdata/testinput24 @@ -281,12 +281,18 @@ /??a??/ #pattern convert=unset -#pattern convert=glob:glob_no_backslash +#pattern convert=glob,convert_glob_escape=0 /a\b\cd/ /**\/a/ +/a`*b/convert_glob_escape=` + +/a`*b/convert_glob_escape=0 + +/a`*b/convert_glob_escape=x + #pattern convert=unset:posix_extended /a[[:>:]z/ diff --git a/testdata/testoutput24 b/testdata/testoutput24 index 9c5466d..dbb248e 100644 --- a/testdata/testoutput24 +++ b/testdata/testoutput24 @@ -429,7 +429,7 @@ No match (?s)\A..a..\z #pattern convert=unset -#pattern convert=glob:glob_no_backslash +#pattern convert=glob,convert_glob_escape=0 /a\b\cd/ (?s)\Aa\\b\\cd\z @@ -437,6 +437,15 @@ No match /**\/a/ ** Pattern conversion error at offset 2: invalid syntax +/a`*b/convert_glob_escape=` +(?s)\Aa\*b\z + +/a`*b/convert_glob_escape=0 +(?s)\Aa`(*COMMIT)[^/]*?b\z + +/a`*b/convert_glob_escape=x +** Invalid glob escape 'x' + #pattern convert=unset:posix_extended /a[[:>:]z/