More experimental convert code evolution.

This commit is contained in:
Philip.Hazel 2017-05-23 16:08:48 +00:00
parent 51df11a591
commit 231a800557
8 changed files with 169 additions and 98 deletions

View File

@ -187,7 +187,7 @@ ignored for pcre2_jit_match(). */
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
#define PCRE2_CONVERT_GLOB 0x00000010u
#define PCRE2_CONVERT_GLOB_NO_BACKSLASH 0x00000030u
#define PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL 0x00000030u
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000050u
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000090u
#define PCRE2_CONVERT_GLOB_BASIC 0x000000f0u
@ -496,6 +496,8 @@ PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
*pcre2_convert_context_create(pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_convert_context_free(pcre2_convert_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
@ -733,6 +735,7 @@ pcre2_compile are called by application code. */
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)

View File

@ -187,7 +187,7 @@ ignored for pcre2_jit_match(). */
#define PCRE2_CONVERT_POSIX_BASIC 0x00000004u
#define PCRE2_CONVERT_POSIX_EXTENDED 0x00000008u
#define PCRE2_CONVERT_GLOB 0x00000010u
#define PCRE2_CONVERT_GLOB_NO_BACKSLASH 0x00000030u
#define PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL 0x00000030u
#define PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR 0x00000050u
#define PCRE2_CONVERT_GLOB_NO_STARSTAR 0x00000090u
#define PCRE2_CONVERT_GLOB_BASIC 0x000000f0u
@ -496,6 +496,8 @@ PCRE2_EXP_DECL pcre2_convert_context PCRE2_CALL_CONVENTION \
*pcre2_convert_context_create(pcre2_general_context *); \
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
pcre2_convert_context_free(pcre2_convert_context *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_escape(pcre2_convert_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_glob_separator(pcre2_convert_context *, uint32_t);
@ -733,6 +735,7 @@ pcre2_compile are called by application code. */
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
#define pcre2_set_glob_escape PCRE2_SUFFIX(pcre2_set_glob_escape_)
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)

View File

@ -189,15 +189,17 @@ return mcontext;
}
/* A default covert context is set up to save having to initialize at run time
/* A default convert context is set up to save having to initialize at run time
when no context is supplied to the convert function. */
const pcre2_convert_context PRIV(default_convert_context) = {
{ default_malloc, default_free, NULL }, /* Default memory handling */
#ifdef _WIN32
CHAR_BACKSLASH /* Default path separator */
#else /* is OS dependent */
CHAR_SLASH /* Not Windows */
CHAR_BACKSLASH, /* Default path separator */
CHAR_GRAVE_ACCENT /* Default escape character */
#else /* Not Windows */
CHAR_SLASH, /* Default path separator */
CHAR_BACKSLASH /* Default escape character */
#endif
};
@ -454,6 +456,14 @@ ccontext->glob_separator = separator;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_glob_escape(pcre2_convert_context *ccontext, uint32_t escape)
{
if (escape > 255 || (escape != 0 && !ispunct(escape)))
return PCRE2_ERROR_BADDATA;
ccontext->glob_escape = escape;
return 0;
}
/* End of pcre2_context.c */

View File

@ -49,8 +49,10 @@ POSSIBILITY OF SUCH DAMAGE.
PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
PCRE2_CONVERT_GLOB_NO_BACKSLASH|PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
PCRE2_CONVERT_GLOB_NO_STARSTAR|TYPE_OPTIONS)
PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL| \
PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
PCRE2_CONVERT_GLOB_NO_STARSTAR| \
TYPE_OPTIONS)
#define DUMMY_BUFFER_SIZE 100
@ -726,7 +728,7 @@ PCRE2_SPTR pattern_start = pattern;
PCRE2_SPTR pattern_end = pattern + plength;
PCRE2_UCHAR separator = ccontext->glob_separator;
PCRE2_UCHAR c;
BOOL no_backslash = (options & PCRE2_CONVERT_GLOB_NO_BACKSLASH) != 0;
BOOL no_escape = ccontext->glob_escape == 0;
BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0;
BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
BOOL in_atomic = FALSE;
@ -734,6 +736,8 @@ BOOL after_starstar = FALSE;
BOOL with_escape, is_start;
int result, len;
(void)utf; /* Avoid compiler warning */
if (separator >= 128)
{
/* Currently only ASCII separators are supported. */
@ -805,7 +809,7 @@ while (pattern < pattern_end)
break;
}
if (!no_backslash && *pattern == CHAR_BACKSLASH)
if (!no_escape && *pattern == ccontext->glob_escape)
{
pattern++;
if (pattern >= pattern_end)
@ -925,11 +929,11 @@ while (pattern < pattern_end)
continue;
}
if (!no_backslash && c == CHAR_BACKSLASH)
if (!no_escape && c == ccontext->glob_escape)
{
if (pattern >= pattern_end)
{
result = ERROR_END_BACKSLASH;
result = PCRE2_ERROR_CONVERT_SYNTAX;
break;
}
c = *pattern++;

View File

@ -596,6 +596,7 @@ typedef struct pcre2_real_match_context {
typedef struct pcre2_real_convert_context {
pcre2_memctl memctl;
uint32_t glob_separator;
uint32_t glob_escape;
} pcre2_real_convert_context;
/* The real compiled code structure. The type for the blocksize field is

View File

@ -402,7 +402,7 @@ typedef struct convertstruct {
static convertstruct convertlist[] = {
{ "glob", PCRE2_CONVERT_GLOB },
{ "glob_basic", PCRE2_CONVERT_GLOB_BASIC },
{ "glob_no_backslash", PCRE2_CONVERT_GLOB_NO_BACKSLASH },
{ "glob_no_dot_special", PCRE2_CONVERT_GLOB_NO_DOT_SPECIAL },
{ "glob_no_starstar", PCRE2_CONVERT_GLOB_NO_STARSTAR },
{ "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
{ "posix_basic", PCRE2_CONVERT_POSIX_BASIC },
@ -524,6 +524,7 @@ typedef struct patctl { /* Structure for pattern modifiers. */
uint32_t tables_id;
uint32_t convert_type;
uint32_t convert_length;
uint32_t convert_glob_escape;
uint32_t convert_glob_separator;
uint32_t regerror_buffsize;
uint8_t locale[LOCALESIZE];
@ -599,6 +600,7 @@ static modstruct modlist[] = {
{ "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
{ "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
{ "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) },
{ "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) },
{ "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
{ "convert_length", MOD_PAT, MOD_INT, 0, PO(convert_length) },
{ "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
@ -1286,6 +1288,14 @@ are supported. */
else \
r = pcre2_set_glob_separator_32(G(a,32),b)
#define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
if (test_mode == PCRE8_MODE) \
r = pcre2_set_glob_escape_8(G(a,8),b); \
else if (test_mode == PCRE16_MODE) \
r = pcre2_set_glob_escape_16(G(a,16),b); \
else \
r = pcre2_set_glob_escape_32(G(a,32),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) \
if (test_mode == PCRE8_MODE) \
pcre2_set_heap_limit_8(G(a,8),b); \
@ -1753,6 +1763,12 @@ the three different cases. */
else \
G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
else \
r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
@ -1983,6 +1999,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
@ -2086,6 +2103,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
@ -2189,6 +2207,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
#define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
#define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
@ -2903,13 +2922,14 @@ return yield;
*************************************************/
/* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
For printing *MARK strings, a negative length is given.If handed a NULL file,
For printing *MARK strings, a negative length is given. If handed a NULL file,
just counts chars without printing. */
static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
{
int yield = 0;
(void)(utf); /* Avoid compiler warning */
if (length < 0) length = p[-1];
while (length-- > 0)
{
@ -5385,6 +5405,21 @@ if (pat_patctl.convert_type != CONVERT_UNSET)
convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
CONCTXCPY(con_context, default_con_context);
if (pat_patctl.convert_glob_escape != 0)
{
uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
pat_patctl.convert_glob_escape;
PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
if (rc != 0)
{
fprintf(outfile, "** Invalid glob escape '%c'\n",
pat_patctl.convert_glob_escape);
convert_return = PR_SKIP;
goto CONVERT_FINISH;
}
}
if (pat_patctl.convert_glob_separator != 0)
{
PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);

View File

@ -281,12 +281,18 @@
/??a??/
#pattern convert=unset
#pattern convert=glob:glob_no_backslash
#pattern convert=glob,convert_glob_escape=0
/a\b\cd/
/**\/a/
/a`*b/convert_glob_escape=`
/a`*b/convert_glob_escape=0
/a`*b/convert_glob_escape=x
#pattern convert=unset:posix_extended
/a[[:>:]z/

11
testdata/testoutput24 vendored
View File

@ -429,7 +429,7 @@ No match
(?s)\A..a..\z
#pattern convert=unset
#pattern convert=glob:glob_no_backslash
#pattern convert=glob,convert_glob_escape=0
/a\b\cd/
(?s)\Aa\\b\\cd\z
@ -437,6 +437,15 @@ No match
/**\/a/
** Pattern conversion error at offset 2: invalid syntax
/a`*b/convert_glob_escape=`
(?s)\Aa\*b\z
/a`*b/convert_glob_escape=0
(?s)\Aa`(*COMMIT)[^/]*?b\z
/a`*b/convert_glob_escape=x
** Invalid glob escape 'x'
#pattern convert=unset:posix_extended
/a[[:>:]z/