Support invalid character classes in conversion.
This commit is contained in:
parent
0b2052f714
commit
bf6f53b089
|
@ -471,8 +471,8 @@ Arguments:
|
||||||
pattern_end end of pattern
|
pattern_end end of pattern
|
||||||
out output context
|
out output context
|
||||||
|
|
||||||
Returns: TRUE => success
|
Returns: >0 => class index
|
||||||
FALSE => malformed class
|
0 => malformed class
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -481,48 +481,31 @@ convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
|
||||||
{
|
{
|
||||||
static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"
|
static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"
|
||||||
"graph:lower:print:punct:space:upper:word:xdigit:";
|
"graph:lower:print:punct:space:upper:word:xdigit:";
|
||||||
PCRE2_SPTR pattern = *from;
|
PCRE2_SPTR start = *from + 1;
|
||||||
PCRE2_SPTR start;
|
PCRE2_SPTR pattern = start;
|
||||||
const char *class_ptr;
|
const char *class_ptr;
|
||||||
PCRE2_UCHAR c;
|
PCRE2_UCHAR c;
|
||||||
|
int class_index;
|
||||||
out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
|
|
||||||
out->out_str[1] = CHAR_COLON;
|
|
||||||
convert_glob_write_str(out, 2);
|
|
||||||
|
|
||||||
while (TRUE)
|
while (TRUE)
|
||||||
{
|
{
|
||||||
if (pattern >= pattern_end)
|
if (pattern >= pattern_end) return 0;
|
||||||
{
|
|
||||||
*from = pattern;
|
|
||||||
return ERROR_MISSING_SQUARE_BRACKET;
|
|
||||||
}
|
|
||||||
|
|
||||||
c = *pattern++;
|
c = *pattern++;
|
||||||
|
|
||||||
if (c == CHAR_COLON && pattern < pattern_end &&
|
if (c < CHAR_a || c > CHAR_z) break;
|
||||||
*pattern == CHAR_RIGHT_SQUARE_BRACKET)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (c < CHAR_a || c > CHAR_z)
|
|
||||||
{
|
|
||||||
/* All POSIX class is composed of lowercase characters */
|
|
||||||
*from = pattern;
|
|
||||||
return ERROR_MISSING_SQUARE_BRACKET;
|
|
||||||
}
|
|
||||||
|
|
||||||
convert_glob_write(out, c);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
start = *from;
|
if (c != CHAR_COLON || pattern >= pattern_end ||
|
||||||
*from = pattern + 1;
|
*pattern != CHAR_RIGHT_SQUARE_BRACKET)
|
||||||
|
return 0;
|
||||||
|
|
||||||
class_ptr = posix_classes;
|
class_ptr = posix_classes;
|
||||||
|
class_index = 0;
|
||||||
|
|
||||||
while (TRUE)
|
while (TRUE)
|
||||||
{
|
{
|
||||||
if (*class_ptr == CHAR_NULL) return ERROR_UNKNOWN_POSIX_CLASS;
|
if (*class_ptr == CHAR_NULL) return 0;
|
||||||
|
|
||||||
pattern = start;
|
pattern = start;
|
||||||
|
|
||||||
|
@ -530,10 +513,13 @@ while (TRUE)
|
||||||
{
|
{
|
||||||
if (*pattern == CHAR_COLON)
|
if (*pattern == CHAR_COLON)
|
||||||
{
|
{
|
||||||
out->out_str[0] = CHAR_COLON;
|
pattern += 2;
|
||||||
out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
|
start -= 2;
|
||||||
convert_glob_write_str(out, 2);
|
|
||||||
return 0;
|
do convert_glob_write(out, *start++); while (start < pattern);
|
||||||
|
|
||||||
|
*from = pattern;
|
||||||
|
return class_index;
|
||||||
}
|
}
|
||||||
pattern++;
|
pattern++;
|
||||||
class_ptr++;
|
class_ptr++;
|
||||||
|
@ -541,9 +527,41 @@ while (TRUE)
|
||||||
|
|
||||||
while (*class_ptr != CHAR_COLON) class_ptr++;
|
while (*class_ptr != CHAR_COLON) class_ptr++;
|
||||||
class_ptr++;
|
class_ptr++;
|
||||||
|
class_index++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Checks whether the character is in the class.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
class_index class index
|
||||||
|
c character
|
||||||
|
|
||||||
|
Returns: !0 => character is found in the class
|
||||||
|
0 => otherwise
|
||||||
|
*/
|
||||||
|
|
||||||
|
static BOOL
|
||||||
|
convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
|
||||||
|
{
|
||||||
|
switch (class_index)
|
||||||
|
{
|
||||||
|
case 0: return isalnum(c);
|
||||||
|
case 1: return isalpha(c);
|
||||||
|
case 2: return 1;
|
||||||
|
case 3: return c == CHAR_HT || c == CHAR_SPACE;
|
||||||
|
case 4: return iscntrl(c);
|
||||||
|
case 5: return isdigit(c);
|
||||||
|
case 6: return isgraph(c);
|
||||||
|
case 7: return islower(c);
|
||||||
|
case 8: return isprint(c);
|
||||||
|
case 9: return ispunct(c);
|
||||||
|
case 10: return isspace(c);
|
||||||
|
case 11: return isupper(c);
|
||||||
|
case 12: return isalnum(c) || c == CHAR_UNDERSCORE;
|
||||||
|
default: return isxdigit(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Parse a range of characters.
|
/* Parse a range of characters.
|
||||||
|
|
||||||
|
@ -569,7 +587,7 @@ BOOL has_prev_c;
|
||||||
PCRE2_SPTR pattern = *from;
|
PCRE2_SPTR pattern = *from;
|
||||||
PCRE2_SPTR char_start = NULL;
|
PCRE2_SPTR char_start = NULL;
|
||||||
uint32_t c, prev_c;
|
uint32_t c, prev_c;
|
||||||
int result, len;
|
int len, class_index;
|
||||||
|
|
||||||
(void)utf; /* Avoid compiler warning. */
|
(void)utf; /* Avoid compiler warning. */
|
||||||
|
|
||||||
|
@ -653,17 +671,21 @@ while (pattern < pattern_end)
|
||||||
|
|
||||||
if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
|
if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
|
||||||
{
|
{
|
||||||
*from = pattern + 1;
|
*from = pattern;
|
||||||
|
class_index = convert_glob_parse_class(from, pattern_end, out);
|
||||||
|
|
||||||
result = convert_glob_parse_class(from, pattern_end, out);
|
if (class_index != 0)
|
||||||
if (result != 0) return result;
|
{
|
||||||
|
pattern = *from;
|
||||||
|
|
||||||
pattern = *from;
|
has_prev_c = FALSE;
|
||||||
|
prev_c = 0;
|
||||||
|
|
||||||
has_prev_c = FALSE;
|
if (!is_negative &&
|
||||||
prev_c = 0;
|
convert_glob_char_in_class (class_index, separator))
|
||||||
separator_seen = TRUE;
|
separator_seen = TRUE;
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (c == CHAR_MINUS && has_prev_c &&
|
else if (c == CHAR_MINUS && has_prev_c &&
|
||||||
*pattern != CHAR_RIGHT_SQUARE_BRACKET)
|
*pattern != CHAR_RIGHT_SQUARE_BRACKET)
|
||||||
|
|
|
@ -237,6 +237,8 @@
|
||||||
|
|
||||||
/[[:alpha:]-a]/
|
/[[:alpha:]-a]/
|
||||||
|
|
||||||
|
/[[:alpha:]][[:punct:]][[:ascii:]]/
|
||||||
|
|
||||||
/[a-[:alpha:]]/
|
/[a-[:alpha:]]/
|
||||||
|
|
||||||
/[[:alpha:/
|
/[[:alpha:/
|
||||||
|
|
|
@ -179,12 +179,12 @@ No match
|
||||||
No match
|
No match
|
||||||
|
|
||||||
'[[:alpha:]][[:digit:]][[:upper:]]'
|
'[[:alpha:]][[:digit:]][[:upper:]]'
|
||||||
(?s)\A[[:alpha:]](?<!/)[[:digit:]](?<!/)[[:upper:]](?<!/)\z
|
(?s)\A[[:alpha:]][[:digit:]][[:upper:]]\z
|
||||||
a1B
|
a1B
|
||||||
0: a1B
|
0: a1B
|
||||||
|
|
||||||
'[[:digit:][:upper:][:space:]]'
|
'[[:digit:][:upper:][:space:]]'
|
||||||
(?s)\A[[:digit:][:upper:][:space:]](?<!/)\z
|
(?s)\A[[:digit:][:upper:][:space:]]\z
|
||||||
A
|
A
|
||||||
0: A
|
0: A
|
||||||
1
|
1
|
||||||
|
@ -198,7 +198,7 @@ No match
|
||||||
No match
|
No match
|
||||||
|
|
||||||
'[a-c[:digit:]x-z]'
|
'[a-c[:digit:]x-z]'
|
||||||
(?s)\A[a-c[:digit:]x-z](?<!/)\z
|
(?s)\A[a-c[:digit:]x-z]\z
|
||||||
5
|
5
|
||||||
0: 5
|
0: 5
|
||||||
b
|
b
|
||||||
|
@ -294,7 +294,7 @@ No match
|
||||||
0: <->
|
0: <->
|
||||||
|
|
||||||
/a[[:digit:].]z/
|
/a[[:digit:].]z/
|
||||||
(?s)\Aa[[:digit:].](?<!/)z\z
|
(?s)\Aa[[:digit:].]z\z
|
||||||
a1z
|
a1z
|
||||||
0: a1z
|
0: a1z
|
||||||
a.z
|
a.z
|
||||||
|
@ -304,20 +304,29 @@ No match
|
||||||
No match
|
No match
|
||||||
|
|
||||||
/a[[:digit].]z/
|
/a[[:digit].]z/
|
||||||
** Pattern conversion error at offset 10: missing terminating ] for character class
|
(?s)\Aa[\[:digit]\.\]z\z
|
||||||
a[.]z
|
a[.]z
|
||||||
|
0: a[.]z
|
||||||
a:.]z
|
a:.]z
|
||||||
|
0: a:.]z
|
||||||
ad.]z
|
ad.]z
|
||||||
|
0: ad.]z
|
||||||
|
|
||||||
/<[[:a[:digit:]b]>/
|
/<[[:a[:digit:]b]>/
|
||||||
** Pattern conversion error at offset 6: missing terminating ] for character class
|
(?s)\A<[\[:a[:digit:]b]>\z
|
||||||
<[>
|
<[>
|
||||||
|
0: <[>
|
||||||
<:>
|
<:>
|
||||||
|
0: <:>
|
||||||
<a>
|
<a>
|
||||||
|
0: <a>
|
||||||
<9>
|
<9>
|
||||||
|
0: <9>
|
||||||
<b>
|
<b>
|
||||||
|
0: <b>
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
<d>
|
<d>
|
||||||
|
No match
|
||||||
|
|
||||||
/a*b/convert_glob_separator=\
|
/a*b/convert_glob_separator=\
|
||||||
(?s)\Aa(*COMMIT)[^\\]*?b\z
|
(?s)\Aa(*COMMIT)[^\\]*?b\z
|
||||||
|
@ -349,7 +358,7 @@ No match
|
||||||
(?s)\A[^/a\\bc][^/\]][^/\-][^/\]\-]\z
|
(?s)\A[^/a\\bc][^/\]][^/\-][^/\]\-]\z
|
||||||
|
|
||||||
/[[:alpha:][:xdigit:][:word:]]/
|
/[[:alpha:][:xdigit:][:word:]]/
|
||||||
(?s)\A[[:alpha:][:xdigit:][:word:]](?<!/)\z
|
(?s)\A[[:alpha:][:xdigit:][:word:]]\z
|
||||||
|
|
||||||
"[/-/]"
|
"[/-/]"
|
||||||
(?s)\A[/-/](?<!/)\z
|
(?s)\A[/-/](?<!/)\z
|
||||||
|
@ -364,7 +373,10 @@ No match
|
||||||
(?s)\A[^/\--\-\--\-]\z
|
(?s)\A[^/\--\-\--\-]\z
|
||||||
|
|
||||||
/[[:alpha:]-a]/
|
/[[:alpha:]-a]/
|
||||||
(?s)\A[[:alpha:]\-a](?<!/)\z
|
(?s)\A[[:alpha:]\-a]\z
|
||||||
|
|
||||||
|
/[[:alpha:]][[:punct:]][[:ascii:]]/
|
||||||
|
(?s)\A[[:alpha:]][[:punct:]](?<!/)[[:ascii:]](?<!/)\z
|
||||||
|
|
||||||
/[a-[:alpha:]]/
|
/[a-[:alpha:]]/
|
||||||
** Pattern conversion error at offset 4: invalid syntax
|
** Pattern conversion error at offset 4: invalid syntax
|
||||||
|
@ -376,13 +388,13 @@ No match
|
||||||
** Pattern conversion error at offset 10: missing terminating ] for character class
|
** Pattern conversion error at offset 10: missing terminating ] for character class
|
||||||
|
|
||||||
/[[:alphaa:]]/
|
/[[:alphaa:]]/
|
||||||
** Pattern conversion error at offset 11: unknown POSIX class name
|
(?s)\A[\[:alphaa:]\]\z
|
||||||
|
|
||||||
/[[:xdigi:]]/
|
/[[:xdigi:]]/
|
||||||
** Pattern conversion error at offset 10: unknown POSIX class name
|
(?s)\A[\[:xdigi:]\]\z
|
||||||
|
|
||||||
/[[:xdigit::]]/
|
/[[:xdigit::]]/
|
||||||
** Pattern conversion error at offset 10: missing terminating ] for character class
|
(?s)\A[\[:xdigit::]\]\z
|
||||||
|
|
||||||
/****/
|
/****/
|
||||||
(?s)
|
(?s)
|
||||||
|
|
Loading…
Reference in New Issue