From b2294373d773d13f526e5eddcdce59ac582be3ba Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Mon, 2 Jul 2018 11:23:45 +0000 Subject: [PATCH] Ignore qualifiers on lookaheads within lookbehinds when checking for a fixed length. --- ChangeLog | 4 ++++ src/pcre2_compile.c | 26 ++++++++++++++++++++++++++ testdata/testinput1 | 10 ++++++++++ testdata/testinput2 | 2 ++ testdata/testoutput1 | 10 ++++++++++ testdata/testoutput2 | 14 ++++++++++++++ 6 files changed, 66 insertions(+) diff --git a/ChangeLog b/ChangeLog index 5ee7cae..9e497ea 100644 --- a/ChangeLog +++ b/ChangeLog @@ -100,6 +100,10 @@ a pattern. Because \G is in a lookbehind assertion, there has to be a "bumpalong" before there can be a match. The automatic "advance by one character after an empty string match" rule is therefore inappropriate. A more complicated algorithm has now been implemented. + +22. When checking to see if a lookbehind is of fixed length, lookaheads were +correctly ignored, but qualifiers on lookaheads were not being ignored, leading +to an incorrect "lookbehind assertion is not fixed length" error. Version 10.31 12-February-2018 diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 0f75f36..5a47f1d 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -8572,6 +8572,32 @@ for (;; pptr++) case META_LOOKAHEADNOT: pptr = parsed_skip(pptr + 1, PSKIP_KET); if (pptr == NULL) goto PARSED_SKIP_FAILED; + + /* Also ignore any qualifiers that follow a lookahead assertion. */ + + switch (pptr[1]) + { + case META_ASTERISK: + case META_ASTERISK_PLUS: + case META_ASTERISK_QUERY: + case META_PLUS: + case META_PLUS_PLUS: + case META_PLUS_QUERY: + case META_QUERY: + case META_QUERY_PLUS: + case META_QUERY_QUERY: + pptr++; + break; + + case META_MINMAX: + case META_MINMAX_PLUS: + case META_MINMAX_QUERY: + pptr += 3; + break; + + default: + break; + } break; /* Lookbehinds can be ignored, but must themselves be checked. */ diff --git a/testdata/testinput1 b/testdata/testinput1 index fb50238..1b3191c 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -6192,4 +6192,14 @@ ef) x/x,mark /(?<=\G.)/g,aftertext abc +/(?<=(?=.)?)/ + +/(?<=(?=.)?+)/ + +/(?<=(?=.)*)/ + +/(?<=(?=.){4,5})/ + +/(?<=(?=.){4,5}x)/ + # End of testinput1 diff --git a/testdata/testinput2 b/testdata/testinput2 index 7e703d5..cc4b59b 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5435,4 +5435,6 @@ a)"xI /(?=a+)a(a+)++b/B +/(?<=(?=.){4,5}x)/B + # End of testinput2 diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 348dcbc..06469fa 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -9831,4 +9831,14 @@ No match 0: 0+ +/(?<=(?=.)?)/ + +/(?<=(?=.)?+)/ + +/(?<=(?=.)*)/ + +/(?<=(?=.){4,5})/ + +/(?<=(?=.){4,5}x)/ + # End of testinput1 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 9f504f6..aab0c94 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -16583,6 +16583,20 @@ No match End ------------------------------------------------------------------ +/(?<=(?=.){4,5}x)/B +------------------------------------------------------------------ + Bra + AssertB + Reverse + Assert + Any + Ket + x + Ket + Ket + End +------------------------------------------------------------------ + # End of testinput2 Error -70: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data