Support invalid character classes in conversion.

This commit is contained in:
Zoltán Herczeg 2017-05-25 13:19:42 +00:00
parent 0b2052f714
commit bf6f53b089
3 changed files with 90 additions and 54 deletions

View File

@ -471,8 +471,8 @@ Arguments:
pattern_end end of pattern pattern_end end of pattern
out output context out output context
Returns: TRUE => success Returns: >0 => class index
FALSE => malformed class 0 => malformed class
*/ */
static int static int
@ -481,48 +481,31 @@ convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
{ {
static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:" static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"
"graph:lower:print:punct:space:upper:word:xdigit:"; "graph:lower:print:punct:space:upper:word:xdigit:";
PCRE2_SPTR pattern = *from; PCRE2_SPTR start = *from + 1;
PCRE2_SPTR start; PCRE2_SPTR pattern = start;
const char *class_ptr; const char *class_ptr;
PCRE2_UCHAR c; PCRE2_UCHAR c;
int class_index;
out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
out->out_str[1] = CHAR_COLON;
convert_glob_write_str(out, 2);
while (TRUE) while (TRUE)
{ {
if (pattern >= pattern_end) if (pattern >= pattern_end) return 0;
{
*from = pattern;
return ERROR_MISSING_SQUARE_BRACKET;
}
c = *pattern++; c = *pattern++;
if (c == CHAR_COLON && pattern < pattern_end && if (c < CHAR_a || c > CHAR_z) break;
*pattern == CHAR_RIGHT_SQUARE_BRACKET)
{
break;
}
if (c < CHAR_a || c > CHAR_z)
{
/* All POSIX class is composed of lowercase characters */
*from = pattern;
return ERROR_MISSING_SQUARE_BRACKET;
}
convert_glob_write(out, c);
} }
start = *from; if (c != CHAR_COLON || pattern >= pattern_end ||
*from = pattern + 1; *pattern != CHAR_RIGHT_SQUARE_BRACKET)
return 0;
class_ptr = posix_classes; class_ptr = posix_classes;
class_index = 0;
while (TRUE) while (TRUE)
{ {
if (*class_ptr == CHAR_NULL) return ERROR_UNKNOWN_POSIX_CLASS; if (*class_ptr == CHAR_NULL) return 0;
pattern = start; pattern = start;
@ -530,10 +513,13 @@ while (TRUE)
{ {
if (*pattern == CHAR_COLON) if (*pattern == CHAR_COLON)
{ {
out->out_str[0] = CHAR_COLON; pattern += 2;
out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET; start -= 2;
convert_glob_write_str(out, 2);
return 0; do convert_glob_write(out, *start++); while (start < pattern);
*from = pattern;
return class_index;
} }
pattern++; pattern++;
class_ptr++; class_ptr++;
@ -541,9 +527,41 @@ while (TRUE)
while (*class_ptr != CHAR_COLON) class_ptr++; while (*class_ptr != CHAR_COLON) class_ptr++;
class_ptr++; class_ptr++;
class_index++;
} }
} }
/* Checks whether the character is in the class.
Arguments:
class_index class index
c character
Returns: !0 => character is found in the class
0 => otherwise
*/
static BOOL
convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
{
switch (class_index)
{
case 0: return isalnum(c);
case 1: return isalpha(c);
case 2: return 1;
case 3: return c == CHAR_HT || c == CHAR_SPACE;
case 4: return iscntrl(c);
case 5: return isdigit(c);
case 6: return isgraph(c);
case 7: return islower(c);
case 8: return isprint(c);
case 9: return ispunct(c);
case 10: return isspace(c);
case 11: return isupper(c);
case 12: return isalnum(c) || c == CHAR_UNDERSCORE;
default: return isxdigit(c);
}
}
/* Parse a range of characters. /* Parse a range of characters.
@ -569,7 +587,7 @@ BOOL has_prev_c;
PCRE2_SPTR pattern = *from; PCRE2_SPTR pattern = *from;
PCRE2_SPTR char_start = NULL; PCRE2_SPTR char_start = NULL;
uint32_t c, prev_c; uint32_t c, prev_c;
int result, len; int len, class_index;
(void)utf; /* Avoid compiler warning. */ (void)utf; /* Avoid compiler warning. */
@ -653,17 +671,21 @@ while (pattern < pattern_end)
if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON) if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
{ {
*from = pattern + 1; *from = pattern;
class_index = convert_glob_parse_class(from, pattern_end, out);
result = convert_glob_parse_class(from, pattern_end, out); if (class_index != 0)
if (result != 0) return result; {
pattern = *from;
pattern = *from; has_prev_c = FALSE;
prev_c = 0;
has_prev_c = FALSE; if (!is_negative &&
prev_c = 0; convert_glob_char_in_class (class_index, separator))
separator_seen = TRUE; separator_seen = TRUE;
continue; continue;
}
} }
else if (c == CHAR_MINUS && has_prev_c && else if (c == CHAR_MINUS && has_prev_c &&
*pattern != CHAR_RIGHT_SQUARE_BRACKET) *pattern != CHAR_RIGHT_SQUARE_BRACKET)

View File

@ -237,6 +237,8 @@
/[[:alpha:]-a]/ /[[:alpha:]-a]/
/[[:alpha:]][[:punct:]][[:ascii:]]/
/[a-[:alpha:]]/ /[a-[:alpha:]]/
/[[:alpha:/ /[[:alpha:/

34
testdata/testoutput24 vendored
View File

@ -179,12 +179,12 @@ No match
No match No match
'[[:alpha:]][[:digit:]][[:upper:]]' '[[:alpha:]][[:digit:]][[:upper:]]'
(?s)\A[[:alpha:]](?<!/)[[:digit:]](?<!/)[[:upper:]](?<!/)\z (?s)\A[[:alpha:]][[:digit:]][[:upper:]]\z
a1B a1B
0: a1B 0: a1B
'[[:digit:][:upper:][:space:]]' '[[:digit:][:upper:][:space:]]'
(?s)\A[[:digit:][:upper:][:space:]](?<!/)\z (?s)\A[[:digit:][:upper:][:space:]]\z
A A
0: A 0: A
1 1
@ -198,7 +198,7 @@ No match
No match No match
'[a-c[:digit:]x-z]' '[a-c[:digit:]x-z]'
(?s)\A[a-c[:digit:]x-z](?<!/)\z (?s)\A[a-c[:digit:]x-z]\z
5 5
0: 5 0: 5
b b
@ -294,7 +294,7 @@ No match
0: <-> 0: <->
/a[[:digit:].]z/ /a[[:digit:].]z/
(?s)\Aa[[:digit:].](?<!/)z\z (?s)\Aa[[:digit:].]z\z
a1z a1z
0: a1z 0: a1z
a.z a.z
@ -304,20 +304,29 @@ No match
No match No match
/a[[:digit].]z/ /a[[:digit].]z/
** Pattern conversion error at offset 10: missing terminating ] for character class (?s)\Aa[\[:digit]\.\]z\z
a[.]z a[.]z
0: a[.]z
a:.]z a:.]z
0: a:.]z
ad.]z ad.]z
0: ad.]z
/<[[:a[:digit:]b]>/ /<[[:a[:digit:]b]>/
** Pattern conversion error at offset 6: missing terminating ] for character class (?s)\A<[\[:a[:digit:]b]>\z
<[> <[>
0: <[>
<:> <:>
0: <:>
<a> <a>
0: <a>
<9> <9>
0: <9>
<b> <b>
0: <b>
\= Expect no match \= Expect no match
<d> <d>
No match
/a*b/convert_glob_separator=\ /a*b/convert_glob_separator=\
(?s)\Aa(*COMMIT)[^\\]*?b\z (?s)\Aa(*COMMIT)[^\\]*?b\z
@ -349,7 +358,7 @@ No match
(?s)\A[^/a\\bc][^/\]][^/\-][^/\]\-]\z (?s)\A[^/a\\bc][^/\]][^/\-][^/\]\-]\z
/[[:alpha:][:xdigit:][:word:]]/ /[[:alpha:][:xdigit:][:word:]]/
(?s)\A[[:alpha:][:xdigit:][:word:]](?<!/)\z (?s)\A[[:alpha:][:xdigit:][:word:]]\z
"[/-/]" "[/-/]"
(?s)\A[/-/](?<!/)\z (?s)\A[/-/](?<!/)\z
@ -364,7 +373,10 @@ No match
(?s)\A[^/\--\-\--\-]\z (?s)\A[^/\--\-\--\-]\z
/[[:alpha:]-a]/ /[[:alpha:]-a]/
(?s)\A[[:alpha:]\-a](?<!/)\z (?s)\A[[:alpha:]\-a]\z
/[[:alpha:]][[:punct:]][[:ascii:]]/
(?s)\A[[:alpha:]][[:punct:]](?<!/)[[:ascii:]](?<!/)\z
/[a-[:alpha:]]/ /[a-[:alpha:]]/
** Pattern conversion error at offset 4: invalid syntax ** Pattern conversion error at offset 4: invalid syntax
@ -376,13 +388,13 @@ No match
** Pattern conversion error at offset 10: missing terminating ] for character class ** Pattern conversion error at offset 10: missing terminating ] for character class
/[[:alphaa:]]/ /[[:alphaa:]]/
** Pattern conversion error at offset 11: unknown POSIX class name (?s)\A[\[:alphaa:]\]\z
/[[:xdigi:]]/ /[[:xdigi:]]/
** Pattern conversion error at offset 10: unknown POSIX class name (?s)\A[\[:xdigi:]\]\z
/[[:xdigit::]]/ /[[:xdigit::]]/
** Pattern conversion error at offset 10: missing terminating ] for character class (?s)\A[\[:xdigit::]\]\z
/****/ /****/
(?s) (?s)