Reworking bash glob conversion.

This commit is contained in:
Zoltán Herczeg 2017-05-17 13:44:24 +00:00
parent 223104001c
commit d9c33d0708
3 changed files with 77 additions and 312 deletions

View File

@ -693,109 +693,6 @@ context->output = output;
context->output_size = output_size; context->output_size = output_size;
} }
/* Bash glob reading modes. */
#define PCRE2_BASH_GLOB_NORMAL 0
#define PCRE2_BASH_GLOB_QUOTED 1
#define PCRE2_BASH_GLOB_DOUBLE_QUOTED 2
#define PCRE2_BASH_GLOB_BACKSLASH 3
/* Maximum nesting level of enclosed groups. */
#define PCRE2_BASH_GLOB_MAX_NESTING 16
typedef struct pcre2_bash_glob_context {
PCRE2_SPTR pattern;
PCRE2_SPTR pattern_end;
pcre2_output_context out;
int read_mode;
BOOL is_control_char;
} pcre2_bash_glob_context;
/* Read the next character from the glob. If the character
is a control character context->is_control_char is set
to TRUE. Otherwise this field is FALSE.
Arguments:
context the bash glob context
utf TRUE if UTF
*/
static BOOL
convert_glob_bash_read(pcre2_bash_glob_context *context, BOOL utf)
{
while (TRUE)
{
if (context->pattern >= context->pattern_end)
return FALSE;
context->pattern++;
#ifdef SUPPORT_UNICODE
/* Intermediate unicode octets are always normal characters. */
if (utf && NOT_FIRSTCU(context->pattern[-1]))
{
context->is_control_char = FALSE;
return TRUE;
}
#endif
if (context->read_mode == PCRE2_BASH_GLOB_QUOTED)
{
if (context->pattern[-1] != CHAR_APOSTROPHE)
return TRUE;
context->read_mode = PCRE2_BASH_GLOB_NORMAL;
continue;
}
else if (context->read_mode == PCRE2_BASH_GLOB_DOUBLE_QUOTED)
{
if (context->pattern[-1] == CHAR_BACKSLASH &&
context->pattern < context->pattern_end &&
(context->pattern[0] == CHAR_QUOTATION_MARK ||
context->pattern[0] == CHAR_BACKSLASH))
{
context->pattern++;
return TRUE;
}
else if (context->pattern[-1] != CHAR_QUOTATION_MARK)
return TRUE;
context->read_mode = PCRE2_BASH_GLOB_NORMAL;
continue;
}
context->is_control_char = FALSE;
if (context->pattern[-1] == CHAR_APOSTROPHE)
{
context->read_mode = PCRE2_BASH_GLOB_QUOTED;
continue;
}
if (context->pattern[-1] == CHAR_QUOTATION_MARK)
{
context->read_mode = PCRE2_BASH_GLOB_DOUBLE_QUOTED;
continue;
}
if (context->pattern[-1] == CHAR_BACKSLASH)
{
if (context->pattern < context->pattern_end)
{
context->pattern++;
return TRUE;
}
context->read_mode = PCRE2_BASH_GLOB_BACKSLASH;
return FALSE;
}
context->is_control_char = TRUE;
return TRUE;
}
}
/* Prints a wildcard into the output. /* Prints a wildcard into the output.
@ -806,30 +703,24 @@ Arguments:
*/ */
static void static void
convert_glob_bash_wildcard(pcre2_bash_glob_context *context, convert_glob_bash_wildcard(pcre2_output_context *out,
PCRE2_UCHAR separator, BOOL after_sep) PCRE2_UCHAR separator)
{ {
int len = 2; int len = 2;
context->out.out_str[0] = CHAR_LEFT_SQUARE_BRACKET; out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
context->out.out_str[1] = CHAR_CIRCUMFLEX_ACCENT; out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
if (separator == CHAR_BACKSLASH) if (separator == CHAR_BACKSLASH)
{ {
context->out.out_str[2] = CHAR_BACKSLASH; out->out_str[2] = CHAR_BACKSLASH;
len = 3; len = 3;
} }
if (after_sep) convert_glob_bash_write_str(out, len);
{
context->out.out_str[len] = CHAR_DOT;
len++;
}
convert_glob_bash_write_str(&context->out, len); convert_glob_bash_write(out, separator);
convert_glob_bash_write(out, CHAR_RIGHT_SQUARE_BRACKET);
convert_glob_bash_write(&context->out, separator);
convert_glob_bash_write(&context->out, CHAR_RIGHT_SQUARE_BRACKET);
} }
@ -851,204 +742,98 @@ Returns: 0 => success
*/ */
static int static int
convert_glob_bash(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength, convert_glob_bash(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength,
BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length, BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext) PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
{ {
pcre2_bash_glob_context context; pcre2_output_context out;
uint8_t group_types[PCRE2_BASH_GLOB_MAX_NESTING]; PCRE2_SPTR pattern_start = pattern;
int nesting_level, result; PCRE2_SPTR pattern_end = pattern + plength;
BOOL after_sep = TRUE; int result;
PCRE2_UCHAR c; PCRE2_UCHAR c;
/* Initialize default for error offset as end of input. */ /* Initialize default for error offset as end of input. */
context.pattern = pattern; out.output = use_buffer;
context.pattern_end = pattern + plength; out.output_end = use_buffer + use_length;
context.read_mode = PCRE2_BASH_GLOB_NORMAL; out.output_size = 0;
context.out.output = use_buffer;
context.out.output_end = use_buffer + use_length;
context.out.output_size = 0;
context.out.out_str[0] = CHAR_BACKSLASH; out.out_str[0] = CHAR_LEFT_PARENTHESIS;
context.out.out_str[1] = CHAR_A; out.out_str[1] = CHAR_QUESTION_MARK;
convert_glob_bash_write_str(&context.out, 2); out.out_str[2] = CHAR_s;
out.out_str[3] = CHAR_RIGHT_PARENTHESIS;
out.out_str[4] = CHAR_BACKSLASH;
out.out_str[5] = CHAR_A;
convert_glob_bash_write_str(&out, 6);
nesting_level = 0;
result = 0; result = 0;
while (convert_glob_bash_read(&context, utf)) while (pattern < pattern_end)
{ {
c = context.pattern[-1]; c = *pattern++;
if (context.is_control_char)
{
if (c == CHAR_LEFT_PARENTHESIS)
{
/* ! Unexpected open parenthesis ! */
result = ERROR_END_BACKSLASH;
break;
}
if (c == CHAR_RIGHT_PARENTHESIS)
{
if (nesting_level == 0)
{
/* ! Unexpected open parenthesis ! */
result = ERROR_END_BACKSLASH;
break;
}
c = group_types[--nesting_level];
convert_glob_bash_write(&context.out, CHAR_RIGHT_PARENTHESIS);
if (c != CHAR_COMMERCIAL_AT)
{
convert_glob_bash_write(&context.out, c);
convert_glob_bash_write(&context.out, CHAR_QUESTION_MARK);
}
after_sep = FALSE;
continue;
}
if (c == CHAR_VERTICAL_LINE && nesting_level > 0)
{
convert_glob_bash_write(&context.out, CHAR_VERTICAL_LINE);
after_sep = FALSE;
continue;
}
if ((c == CHAR_QUESTION_MARK || c == CHAR_ASTERISK ||
c == CHAR_PLUS || c == CHAR_COMMERCIAL_AT) &&
context.pattern < context.pattern_end &&
context.pattern[0] == CHAR_LEFT_PARENTHESIS)
{
if (nesting_level >= PCRE2_BASH_GLOB_MAX_NESTING)
{
result = ERROR_TOO_DEEP_NESTING;
break;
}
if (after_sep)
{
context.out.out_str[0] = CHAR_LEFT_PARENTHESIS;
context.out.out_str[1] = CHAR_QUESTION_MARK;
context.out.out_str[2] = CHAR_EXCLAMATION_MARK;
context.out.out_str[3] = CHAR_BACKSLASH;
context.out.out_str[4] = CHAR_DOT;
context.out.out_str[5] = CHAR_RIGHT_PARENTHESIS;
convert_glob_bash_write_str(&context.out, 6);
}
context.pattern++;
group_types[nesting_level++] = (uint8_t) c;
context.out.out_str[0] = CHAR_LEFT_PARENTHESIS;
context.out.out_str[1] = CHAR_QUESTION_MARK;
context.out.out_str[2] = CHAR_COLON;
convert_glob_bash_write_str(&context.out, 3);
after_sep = FALSE;
continue;
}
if (c == CHAR_ASTERISK) if (c == CHAR_ASTERISK)
{ {
if (nesting_level == 0 && context.pattern != pattern + 1) if (pattern != pattern_start + 1)
{ {
context.out.out_str[0] = CHAR_LEFT_PARENTHESIS; out.out_str[0] = CHAR_LEFT_PARENTHESIS;
context.out.out_str[1] = CHAR_ASTERISK; out.out_str[1] = CHAR_ASTERISK;
context.out.out_str[2] = CHAR_C; out.out_str[2] = CHAR_C;
context.out.out_str[3] = CHAR_O; out.out_str[3] = CHAR_O;
context.out.out_str[4] = CHAR_M; out.out_str[4] = CHAR_M;
context.out.out_str[5] = CHAR_M; out.out_str[5] = CHAR_M;
context.out.out_str[6] = CHAR_I; out.out_str[6] = CHAR_I;
context.out.out_str[7] = CHAR_T; out.out_str[7] = CHAR_T;
convert_glob_bash_write_str(&context.out, 8); convert_glob_bash_write_str(&out, 8);
convert_glob_bash_write(&context.out, CHAR_RIGHT_PARENTHESIS); convert_glob_bash_write(&out, CHAR_RIGHT_PARENTHESIS);
} }
if (after_sep) convert_glob_bash_wildcard(&out, ccontext->glob_separator);
{ out.out_str[0] = CHAR_ASTERISK;
context.out.out_str[0] = CHAR_LEFT_PARENTHESIS; out.out_str[1] = CHAR_QUESTION_MARK;
context.out.out_str[1] = CHAR_QUESTION_MARK; convert_glob_bash_write_str(&out, 2);
context.out.out_str[2] = CHAR_COLON;
convert_glob_bash_write_str(&context.out, 3);
convert_glob_bash_wildcard(&context, ccontext->glob_separator, TRUE);
convert_glob_bash_wildcard(&context, ccontext->glob_separator, FALSE);
context.out.out_str[0] = CHAR_ASTERISK;
context.out.out_str[1] = CHAR_QUESTION_MARK;
context.out.out_str[2] = CHAR_RIGHT_PARENTHESIS;
context.out.out_str[3] = CHAR_QUESTION_MARK;
context.out.out_str[4] = CHAR_QUESTION_MARK;
convert_glob_bash_write_str(&context.out, 5);
}
else
{
convert_glob_bash_wildcard(&context, ccontext->glob_separator, FALSE);
context.out.out_str[0] = CHAR_ASTERISK;
context.out.out_str[1] = CHAR_QUESTION_MARK;
convert_glob_bash_write_str(&context.out, 2);
}
after_sep = FALSE;
continue; continue;
} }
if (c == CHAR_QUESTION_MARK) if (c == CHAR_QUESTION_MARK)
{ {
convert_glob_bash_wildcard(&context, convert_glob_bash_wildcard(&out, ccontext->glob_separator);
ccontext->glob_separator, after_sep);
after_sep = FALSE;
continue; continue;
} }
}
after_sep = (c == ccontext->glob_separator); if (c == CHAR_BACKSLASH)
if (after_sep && nesting_level > 0)
{ {
context.out.out_str[0] = CHAR_LEFT_PARENTHESIS; if (pattern >= pattern_end)
context.out.out_str[1] = CHAR_ASTERISK; {
context.out.out_str[2] = CHAR_F; result = ERROR_END_BACKSLASH;
context.out.out_str[3] = CHAR_RIGHT_PARENTHESIS; break;
convert_glob_bash_write_str(&context.out, 4); }
c = *pattern++;
after_sep = FALSE;
continue;
} }
if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL) if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
convert_glob_bash_write(&context.out, CHAR_BACKSLASH); convert_glob_bash_write(&out, CHAR_BACKSLASH);
convert_glob_bash_write(&context.out, c); convert_glob_bash_write(&out, c);
} }
if (result == 0) if (result == 0)
{ {
/* ! Unexpected end of input ! */ out.out_str[0] = CHAR_BACKSLASH;
if (nesting_level > 0 || context.read_mode != PCRE2_BASH_GLOB_NORMAL) out.out_str[1] = CHAR_z;
result = ERROR_MISSING_CLOSING_PARENTHESIS; out.out_str[2] = CHAR_NULL;
else convert_glob_bash_write_str(&out, 3);
{
context.out.out_str[0] = CHAR_BACKSLASH; if (!dummyrun && out.output_size != (out.output - use_buffer))
context.out.out_str[1] = CHAR_z; result = PCRE2_ERROR_NOMEMORY;
context.out.out_str[2] = CHAR_NULL;
convert_glob_bash_write_str(&context.out, 3);
}
} }
if (result != 0) if (result != 0)
{ {
*bufflenptr = context.out.output - use_buffer; *bufflenptr = pattern - pattern_start;
return result; return result;
} }
*bufflenptr = context.out.output_size - 1; *bufflenptr = out.output_size - 1;
return 0; return 0;
} }
@ -1139,7 +924,7 @@ for (i = 0; i < 2; i++)
break; break;
case PCRE2_CONVERT_GLOB_BASH: case PCRE2_CONVERT_GLOB_BASH:
rc = convert_glob_bash(pattype, pattern, plength, utf, use_buffer, use_length, rc = convert_glob_bash(options, pattern, plength, utf, use_buffer, use_length,
bufflenptr, dummyrun, ccontext); bufflenptr, dummyrun, ccontext);
break; break;

10
testdata/testinput24 vendored
View File

@ -222,20 +222,12 @@
/A\B\\C\D/ /A\B\\C\D/
/A'B'C'''\'''D''/ /\\{}\?\*+\[\]()|.^$/
/A""B"\\\"\C"''""/
/'\{}?*+[]()|.^$'/
/*a*\/*b*/ /*a*\/*b*/
/?a?\/?b?/ /?a?\/?b?/
/a|b@(a|b)*\/@(a|b)/
/\/@(a\/|b\/)\//
#pattern convert=unset #pattern convert=unset
#pattern convert=posix_extended #pattern convert=posix_extended

24
testdata/testoutput24 vendored
View File

@ -345,31 +345,19 @@ No match
# Non control character checking # Non control character checking
// //
\A\z (?s)\A\z
/A\B\\C\D/ /A\B\\C\D/
\AAB\\CD\z (?s)\AAB\\CD\z
/A'B'C'''\'''D''/ /\\{}\?\*+\[\]()|.^$/
\AABC\\D\z (?s)\A\\\{\}\?\*\+\[\]\(\)\|\.\^\$\z
/A""B"\\\"\C"''""/
\AAB\\"\\C\z
/'\{}?*+[]()|.^$'/
\A\\\{\}\?\*\+\[\]\(\)\|\.\^\$\z
/*a*\/*b*/ /*a*\/*b*/
\A(?:[^./][^/]*?)??a(*COMMIT)[^/]*?/(*COMMIT)(?:[^./][^/]*?)??b(*COMMIT)[^/]*?\z (?s)\A[^/]*?a(*COMMIT)[^/]*?/(*COMMIT)[^/]*?b(*COMMIT)[^/]*?\z
/?a?\/?b?/ /?a?\/?b?/
\A[^./]a[^/]/[^./]b[^/]\z (?s)\A[^/]a[^/]/[^/]b[^/]\z
/a|b@(a|b)*\/@(a|b)/
\Aa\|b(?:a|b)(*COMMIT)[^/]*?/(?!\.)(?:a|b)\z
/\/@(a\/|b\/)\//
\A/(?!\.)(?:a(*F)|b(*F))/\z
#pattern convert=unset #pattern convert=unset