diff --git a/src/pcre2_convert.c b/src/pcre2_convert.c index b964e58..8f2dd8f 100644 --- a/src/pcre2_convert.c +++ b/src/pcre2_convert.c @@ -74,6 +74,7 @@ POSSIBILITY OF SUCH DAMAGE. #define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN #define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS #define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS +#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS /* States for range and POSIX processing */ @@ -101,12 +102,12 @@ static const char *pcre2_escaped_literals = STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS; -/* Recognized escapes in POSIX basic patterns. */ +/* Recognized escaped metacharacters in POSIX basic patterns. */ -static const char *posix_basic_escapes = - STR_QUESTION_MARK STR_PLUS STR_VERTICAL_LINE +static const char *posix_meta_escapes = STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS - STR_0 STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9; + STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET + STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9; @@ -155,6 +156,7 @@ BOOL nextisliteral = FALSE; /* Initialize default for error offset as end of input. */ *bufflenptr = plength; +PUTCHARS(STR_STAR_NUL); /* Now scan the input. */ @@ -237,7 +239,9 @@ while (plength > 0) case CHAR_LEFT_SQUARE_BRACKET: PUTCHARS(STR_LEFT_SQUARE_BRACKET); - /* Handle special cases [[:<:]] and [[:>:]] (which PCRE does support) */ +#ifdef NEVER + /* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does + support) but they are not part of POSIX 1003.1. */ if (plength >= 6) { @@ -257,8 +261,9 @@ while (plength > 0) continue; /* With next character */ } } +#endif - /* Handle "normal" character classes */ + /* Handle start of "normal" character classes */ posix_state = POSIX_CLASS_NOT_STARTED; @@ -283,15 +288,17 @@ while (plength > 0) case CHAR_BACKSLASH: if (plength <= 0) return ERROR_END_BACKSLASH; - if (!extended && *posix < 127 && - strchr(posix_basic_escapes, *posix) != NULL) - { - if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH); - if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY; - lastspecial = *p++ = *posix++; - plength--; + if (extended) nextisliteral = TRUE; else + { + if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL) + { + if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH); + if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY; + lastspecial = *p++ = *posix++; + plength--; + } + else nextisliteral = TRUE; } - else nextisliteral = TRUE; break; case CHAR_RIGHT_PARENTHESIS: @@ -323,7 +330,8 @@ while (plength > 0) case CHAR_ASTERISK: if (lastspecial != CHAR_ASTERISK) { - if (!extended && posix_state < POSIX_NOT_BRACKET) + if (!extended && (posix_state < POSIX_NOT_BRACKET || + lastspecial == CHAR_LEFT_PARENTHESIS)) goto ESCAPE_LITERAL; goto COPY_SPECIAL; } diff --git a/testdata/testinput24 b/testdata/testinput24 index f25c04b..09acd2d 100644 --- a/testdata/testinput24 +++ b/testdata/testinput24 @@ -323,9 +323,9 @@ /a`*b/convert_glob_escape=x -#pattern convert=unset:posix_extended +# -------- Tests of extended POSIX conversion -------- -/a[[:>:]z/ +#pattern convert=unset:posix_extended /<[[:a[:digit:]b]>/ <[> @@ -338,8 +338,6 @@ /a+\1b\\c|d[ab\c]/ -/a[[:<:]]b[[:>:]]/ - /<[]bc]>/ <]> @@ -361,6 +359,8 @@ /a***b/ +# -------- Tests of basic POSIX conversion -------- + #pattern convert=unset:posix_basic /a*b+c\+[def](ab)\(cd\)/ @@ -371,6 +371,9 @@ a1b /how.to how\.to/ + how\nto how.to +\= Expect no match + how\x{0}to how.to /^how to \^how to/ @@ -383,13 +386,11 @@ XabcY X*abcY X**abcY + +/*ab\(*cd\)/ /^b\(c^d\)\(^e^f\)/ /a***b/ -#pattern convert=unset - -/abc/ - # End of testinput24 diff --git a/testdata/testoutput24 b/testdata/testoutput24 index 622c0c6..d855b9c 100644 --- a/testdata/testoutput24 +++ b/testdata/testoutput24 @@ -508,14 +508,12 @@ No match /a`*b/convert_glob_escape=x ** Invalid glob escape 'x' +# -------- Tests of extended POSIX conversion -------- + #pattern convert=unset:posix_extended -/a[[:>:]z/ -a[[:>:]z -Failed: error 130 at offset 4: unknown POSIX class name - /<[[:a[:digit:]b]>/ -<[[:a[:digit:]b]> +(*NUL)<[[:a[:digit:]b]> <[> 0: <[> <:> @@ -531,13 +529,10 @@ Failed: error 130 at offset 4: unknown POSIX class name No match /a+\1b\\c|d[ab\c]/ -a+1b\\c|d[ab\\c] - -/a[[:<:]]b[[:>:]]/ -a[[:<:]]b[[:>:]] +(*NUL)a+1b\\c|d[ab\\c] /<[]bc]>/ -<[]bc]> +(*NUL)<[]bc]> <]> 0: <]> @@ -546,7 +541,7 @@ a[[:<:]]b[[:>:]] 0: /<[^]bc]>/ -<[^]bc]> +(*NUL)<[^]bc]> <.> 0: <.> \= Expect no match @@ -556,7 +551,7 @@ No match No match /(a)\1b/ -(a)1b +(*NUL)(a)1b a1b 0: a1b 1: a @@ -565,21 +560,23 @@ No match No match /(ab)c)d]/ -(ab)c\)d\] +(*NUL)(ab)c\)d\] Xabc)d]Y 0: abc)d] 1: ab /a***b/ -a*b +(*NUL)a*b + +# -------- Tests of basic POSIX conversion -------- #pattern convert=unset:posix_basic /a*b+c\+[def](ab)\(cd\)/ -a*b\+c+[def]\(ab\)(cd) +(*NUL)a*b\+c\+[def]\(ab\)(cd) /\(a\)\1b/ -(a)\1b +(*NUL)(a)\1b aab 0: aab 1: a @@ -588,36 +585,40 @@ a*b\+c+[def]\(ab\)(cd) No match /how.to how\.to/ -how.to how\.to +(*NUL)how.to how\.to + how\nto how.to + 0: how\x0ato how.to +\= Expect no match + how\x{0}to how.to +No match /^how to \^how to/ -^how to \^how to +(*NUL)^how to \^how to /^*abc/ -^\*abc +(*NUL)^\*abc /*abc/ -\*abc +(*NUL)\*abc X*abcY 0: *abc /**abc/ -\**abc +(*NUL)\**abc XabcY 0: abc X*abcY 0: *abc X**abcY 0: **abc + +/*ab\(*cd\)/ +(*NUL)\*ab(\*cd) /^b\(c^d\)\(^e^f\)/ -^b(c\^d)(^e\^f) +(*NUL)^b(c\^d)(^e\^f) /a***b/ -a*b - -#pattern convert=unset - -/abc/ +(*NUL)a*b # End of testinput24