Update POSIX basic regex conversion code.

This commit is contained in:
Philip.Hazel 2017-05-27 17:08:28 +00:00
parent 62e202f757
commit c34544b11f
3 changed files with 60 additions and 50 deletions

View File

@ -74,6 +74,7 @@ POSSIBILITY OF SUCH DAMAGE.
#define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN #define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
#define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS #define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS #define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
/* States for range and POSIX processing */ /* States for range and POSIX processing */
@ -101,12 +102,12 @@ static const char *pcre2_escaped_literals =
STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS; STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;
/* Recognized escapes in POSIX basic patterns. */ /* Recognized escaped metacharacters in POSIX basic patterns. */
static const char *posix_basic_escapes = static const char *posix_meta_escapes =
STR_QUESTION_MARK STR_PLUS STR_VERTICAL_LINE
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
STR_0 STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9; STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
@ -155,6 +156,7 @@ BOOL nextisliteral = FALSE;
/* Initialize default for error offset as end of input. */ /* Initialize default for error offset as end of input. */
*bufflenptr = plength; *bufflenptr = plength;
PUTCHARS(STR_STAR_NUL);
/* Now scan the input. */ /* Now scan the input. */
@ -237,7 +239,9 @@ while (plength > 0)
case CHAR_LEFT_SQUARE_BRACKET: case CHAR_LEFT_SQUARE_BRACKET:
PUTCHARS(STR_LEFT_SQUARE_BRACKET); PUTCHARS(STR_LEFT_SQUARE_BRACKET);
/* Handle special cases [[:<:]] and [[:>:]] (which PCRE does support) */ #ifdef NEVER
/* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
support) but they are not part of POSIX 1003.1. */
if (plength >= 6) if (plength >= 6)
{ {
@ -257,8 +261,9 @@ while (plength > 0)
continue; /* With next character */ continue; /* With next character */
} }
} }
#endif
/* Handle "normal" character classes */ /* Handle start of "normal" character classes */
posix_state = POSIX_CLASS_NOT_STARTED; posix_state = POSIX_CLASS_NOT_STARTED;
@ -283,15 +288,17 @@ while (plength > 0)
case CHAR_BACKSLASH: case CHAR_BACKSLASH:
if (plength <= 0) return ERROR_END_BACKSLASH; if (plength <= 0) return ERROR_END_BACKSLASH;
if (!extended && *posix < 127 && if (extended) nextisliteral = TRUE; else
strchr(posix_basic_escapes, *posix) != NULL)
{ {
if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH); if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY; {
lastspecial = *p++ = *posix++; if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
plength--; if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
lastspecial = *p++ = *posix++;
plength--;
}
else nextisliteral = TRUE;
} }
else nextisliteral = TRUE;
break; break;
case CHAR_RIGHT_PARENTHESIS: case CHAR_RIGHT_PARENTHESIS:
@ -323,7 +330,8 @@ while (plength > 0)
case CHAR_ASTERISK: case CHAR_ASTERISK:
if (lastspecial != CHAR_ASTERISK) if (lastspecial != CHAR_ASTERISK)
{ {
if (!extended && posix_state < POSIX_NOT_BRACKET) if (!extended && (posix_state < POSIX_NOT_BRACKET ||
lastspecial == CHAR_LEFT_PARENTHESIS))
goto ESCAPE_LITERAL; goto ESCAPE_LITERAL;
goto COPY_SPECIAL; goto COPY_SPECIAL;
} }

17
testdata/testinput24 vendored
View File

@ -323,9 +323,9 @@
/a`*b/convert_glob_escape=x /a`*b/convert_glob_escape=x
#pattern convert=unset:posix_extended # -------- Tests of extended POSIX conversion --------
/a[[:>:]z/ #pattern convert=unset:posix_extended
/<[[:a[:digit:]b]>/ /<[[:a[:digit:]b]>/
<[> <[>
@ -338,8 +338,6 @@
/a+\1b\\c|d[ab\c]/ /a+\1b\\c|d[ab\c]/
/a[[:<:]]b[[:>:]]/
/<[]bc]>/ /<[]bc]>/
<]> <]>
<b> <b>
@ -361,6 +359,8 @@
/a***b/ /a***b/
# -------- Tests of basic POSIX conversion --------
#pattern convert=unset:posix_basic #pattern convert=unset:posix_basic
/a*b+c\+[def](ab)\(cd\)/ /a*b+c\+[def](ab)\(cd\)/
@ -371,6 +371,9 @@
a1b a1b
/how.to how\.to/ /how.to how\.to/
how\nto how.to
\= Expect no match
how\x{0}to how.to
/^how to \^how to/ /^how to \^how to/
@ -384,12 +387,10 @@
X*abcY X*abcY
X**abcY X**abcY
/*ab\(*cd\)/
/^b\(c^d\)\(^e^f\)/ /^b\(c^d\)\(^e^f\)/
/a***b/ /a***b/
#pattern convert=unset
/abc/
# End of testinput24 # End of testinput24

55
testdata/testoutput24 vendored
View File

@ -508,14 +508,12 @@ No match
/a`*b/convert_glob_escape=x /a`*b/convert_glob_escape=x
** Invalid glob escape 'x' ** Invalid glob escape 'x'
# -------- Tests of extended POSIX conversion --------
#pattern convert=unset:posix_extended #pattern convert=unset:posix_extended
/a[[:>:]z/
a[[:>:]z
Failed: error 130 at offset 4: unknown POSIX class name
/<[[:a[:digit:]b]>/ /<[[:a[:digit:]b]>/
<[[:a[:digit:]b]> (*NUL)<[[:a[:digit:]b]>
<[> <[>
0: <[> 0: <[>
<:> <:>
@ -531,13 +529,10 @@ Failed: error 130 at offset 4: unknown POSIX class name
No match No match
/a+\1b\\c|d[ab\c]/ /a+\1b\\c|d[ab\c]/
a+1b\\c|d[ab\\c] (*NUL)a+1b\\c|d[ab\\c]
/a[[:<:]]b[[:>:]]/
a[[:<:]]b[[:>:]]
/<[]bc]>/ /<[]bc]>/
<[]bc]> (*NUL)<[]bc]>
<]> <]>
0: <]> 0: <]>
<b> <b>
@ -546,7 +541,7 @@ a[[:<:]]b[[:>:]]
0: <c> 0: <c>
/<[^]bc]>/ /<[^]bc]>/
<[^]bc]> (*NUL)<[^]bc]>
<.> <.>
0: <.> 0: <.>
\= Expect no match \= Expect no match
@ -556,7 +551,7 @@ No match
No match No match
/(a)\1b/ /(a)\1b/
(a)1b (*NUL)(a)1b
a1b a1b
0: a1b 0: a1b
1: a 1: a
@ -565,21 +560,23 @@ No match
No match No match
/(ab)c)d]/ /(ab)c)d]/
(ab)c\)d\] (*NUL)(ab)c\)d\]
Xabc)d]Y Xabc)d]Y
0: abc)d] 0: abc)d]
1: ab 1: ab
/a***b/ /a***b/
a*b (*NUL)a*b
# -------- Tests of basic POSIX conversion --------
#pattern convert=unset:posix_basic #pattern convert=unset:posix_basic
/a*b+c\+[def](ab)\(cd\)/ /a*b+c\+[def](ab)\(cd\)/
a*b\+c+[def]\(ab\)(cd) (*NUL)a*b\+c\+[def]\(ab\)(cd)
/\(a\)\1b/ /\(a\)\1b/
(a)\1b (*NUL)(a)\1b
aab aab
0: aab 0: aab
1: a 1: a
@ -588,21 +585,26 @@ a*b\+c+[def]\(ab\)(cd)
No match No match
/how.to how\.to/ /how.to how\.to/
how.to how\.to (*NUL)how.to how\.to
how\nto how.to
0: how\x0ato how.to
\= Expect no match
how\x{0}to how.to
No match
/^how to \^how to/ /^how to \^how to/
^how to \^how to (*NUL)^how to \^how to
/^*abc/ /^*abc/
^\*abc (*NUL)^\*abc
/*abc/ /*abc/
\*abc (*NUL)\*abc
X*abcY X*abcY
0: *abc 0: *abc
/**abc/ /**abc/
\**abc (*NUL)\**abc
XabcY XabcY
0: abc 0: abc
X*abcY X*abcY
@ -610,14 +612,13 @@ how.to how\.to
X**abcY X**abcY
0: **abc 0: **abc
/*ab\(*cd\)/
(*NUL)\*ab(\*cd)
/^b\(c^d\)\(^e^f\)/ /^b\(c^d\)\(^e^f\)/
^b(c\^d)(^e\^f) (*NUL)^b(c\^d)(^e\^f)
/a***b/ /a***b/
a*b (*NUL)a*b
#pattern convert=unset
/abc/
# End of testinput24 # End of testinput24