Update POSIX basic regex conversion code.

This commit is contained in:
Philip.Hazel 2017-05-27 17:08:28 +00:00
parent 62e202f757
commit c34544b11f
3 changed files with 60 additions and 50 deletions

View File

@ -74,6 +74,7 @@ POSSIBILITY OF SUCH DAMAGE.
#define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
#define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
/* States for range and POSIX processing */
@ -101,12 +102,12 @@ static const char *pcre2_escaped_literals =
STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;
/* Recognized escapes in POSIX basic patterns. */
/* Recognized escaped metacharacters in POSIX basic patterns. */
static const char *posix_basic_escapes =
STR_QUESTION_MARK STR_PLUS STR_VERTICAL_LINE
static const char *posix_meta_escapes =
STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
STR_0 STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
@ -155,6 +156,7 @@ BOOL nextisliteral = FALSE;
/* Initialize default for error offset as end of input. */
*bufflenptr = plength;
PUTCHARS(STR_STAR_NUL);
/* Now scan the input. */
@ -237,7 +239,9 @@ while (plength > 0)
case CHAR_LEFT_SQUARE_BRACKET:
PUTCHARS(STR_LEFT_SQUARE_BRACKET);
/* Handle special cases [[:<:]] and [[:>:]] (which PCRE does support) */
#ifdef NEVER
/* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
support) but they are not part of POSIX 1003.1. */
if (plength >= 6)
{
@ -257,8 +261,9 @@ while (plength > 0)
continue; /* With next character */
}
}
#endif
/* Handle "normal" character classes */
/* Handle start of "normal" character classes */
posix_state = POSIX_CLASS_NOT_STARTED;
@ -283,15 +288,17 @@ while (plength > 0)
case CHAR_BACKSLASH:
if (plength <= 0) return ERROR_END_BACKSLASH;
if (!extended && *posix < 127 &&
strchr(posix_basic_escapes, *posix) != NULL)
{
if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
lastspecial = *p++ = *posix++;
plength--;
if (extended) nextisliteral = TRUE; else
{
if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
{
if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
lastspecial = *p++ = *posix++;
plength--;
}
else nextisliteral = TRUE;
}
else nextisliteral = TRUE;
break;
case CHAR_RIGHT_PARENTHESIS:
@ -323,7 +330,8 @@ while (plength > 0)
case CHAR_ASTERISK:
if (lastspecial != CHAR_ASTERISK)
{
if (!extended && posix_state < POSIX_NOT_BRACKET)
if (!extended && (posix_state < POSIX_NOT_BRACKET ||
lastspecial == CHAR_LEFT_PARENTHESIS))
goto ESCAPE_LITERAL;
goto COPY_SPECIAL;
}

17
testdata/testinput24 vendored
View File

@ -323,9 +323,9 @@
/a`*b/convert_glob_escape=x
#pattern convert=unset:posix_extended
# -------- Tests of extended POSIX conversion --------
/a[[:>:]z/
#pattern convert=unset:posix_extended
/<[[:a[:digit:]b]>/
<[>
@ -338,8 +338,6 @@
/a+\1b\\c|d[ab\c]/
/a[[:<:]]b[[:>:]]/
/<[]bc]>/
<]>
<b>
@ -361,6 +359,8 @@
/a***b/
# -------- Tests of basic POSIX conversion --------
#pattern convert=unset:posix_basic
/a*b+c\+[def](ab)\(cd\)/
@ -371,6 +371,9 @@
a1b
/how.to how\.to/
how\nto how.to
\= Expect no match
how\x{0}to how.to
/^how to \^how to/
@ -383,13 +386,11 @@
XabcY
X*abcY
X**abcY
/*ab\(*cd\)/
/^b\(c^d\)\(^e^f\)/
/a***b/
#pattern convert=unset
/abc/
# End of testinput24

55
testdata/testoutput24 vendored
View File

@ -508,14 +508,12 @@ No match
/a`*b/convert_glob_escape=x
** Invalid glob escape 'x'
# -------- Tests of extended POSIX conversion --------
#pattern convert=unset:posix_extended
/a[[:>:]z/
a[[:>:]z
Failed: error 130 at offset 4: unknown POSIX class name
/<[[:a[:digit:]b]>/
<[[:a[:digit:]b]>
(*NUL)<[[:a[:digit:]b]>
<[>
0: <[>
<:>
@ -531,13 +529,10 @@ Failed: error 130 at offset 4: unknown POSIX class name
No match
/a+\1b\\c|d[ab\c]/
a+1b\\c|d[ab\\c]
/a[[:<:]]b[[:>:]]/
a[[:<:]]b[[:>:]]
(*NUL)a+1b\\c|d[ab\\c]
/<[]bc]>/
<[]bc]>
(*NUL)<[]bc]>
<]>
0: <]>
<b>
@ -546,7 +541,7 @@ a[[:<:]]b[[:>:]]
0: <c>
/<[^]bc]>/
<[^]bc]>
(*NUL)<[^]bc]>
<.>
0: <.>
\= Expect no match
@ -556,7 +551,7 @@ No match
No match
/(a)\1b/
(a)1b
(*NUL)(a)1b
a1b
0: a1b
1: a
@ -565,21 +560,23 @@ No match
No match
/(ab)c)d]/
(ab)c\)d\]
(*NUL)(ab)c\)d\]
Xabc)d]Y
0: abc)d]
1: ab
/a***b/
a*b
(*NUL)a*b
# -------- Tests of basic POSIX conversion --------
#pattern convert=unset:posix_basic
/a*b+c\+[def](ab)\(cd\)/
a*b\+c+[def]\(ab\)(cd)
(*NUL)a*b\+c\+[def]\(ab\)(cd)
/\(a\)\1b/
(a)\1b
(*NUL)(a)\1b
aab
0: aab
1: a
@ -588,36 +585,40 @@ a*b\+c+[def]\(ab\)(cd)
No match
/how.to how\.to/
how.to how\.to
(*NUL)how.to how\.to
how\nto how.to
0: how\x0ato how.to
\= Expect no match
how\x{0}to how.to
No match
/^how to \^how to/
^how to \^how to
(*NUL)^how to \^how to
/^*abc/
^\*abc
(*NUL)^\*abc
/*abc/
\*abc
(*NUL)\*abc
X*abcY
0: *abc
/**abc/
\**abc
(*NUL)\**abc
XabcY
0: abc
X*abcY
0: *abc
X**abcY
0: **abc
/*ab\(*cd\)/
(*NUL)\*ab(\*cd)
/^b\(c^d\)\(^e^f\)/
^b(c\^d)(^e\^f)
(*NUL)^b(c\^d)(^e\^f)
/a***b/
a*b
#pattern convert=unset
/abc/
(*NUL)a*b
# End of testinput24