Refactor named group handling by adding a pre-pass that generates a list of
named groups with their numbers before the rest of the compiling code is run. This has simplified the main compiling code and removed some sources of error.
This commit is contained in:
parent
bf492e47f6
commit
c9ac9e23ec
|
@ -137,6 +137,15 @@ provoke a buffer overflow. This bug was discovered by the LLVM fuzzer.
|
|||
35. Make pcre2test give an error if a pattern that follows #forbud_utf contains
|
||||
\P, \p, or \X.
|
||||
|
||||
36. The way named subpatterns are handled has been refactored. There is now a
|
||||
pre-pass over the regex which does nothing other than identify named
|
||||
subpatterns and count the total captures. This means that information about
|
||||
named patterns is known before the rest of the compile. In particular, it means
|
||||
that forward references can be checked as they are encountered. Previously, the
|
||||
code for handling forward references was contorted and led to several errors in
|
||||
computing the memory requirements for some patterns, leading to buffer
|
||||
overflows.
|
||||
|
||||
|
||||
Version 10.10 06-March-2015
|
||||
---------------------------
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -164,6 +164,7 @@ static const char compile_error_texts[] =
|
|||
"missing terminating delimiter for callout with string argument\0"
|
||||
"unrecognized string delimiter follows (?C\0"
|
||||
"using \\C is disabled by the application\0"
|
||||
"(?| and/or (?J: or (?x: parentheses are too deeply nested\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
|
|
@ -660,8 +660,9 @@ compiling. */
|
|||
|
||||
typedef struct named_group {
|
||||
PCRE2_SPTR name; /* Points to the name in the pattern */
|
||||
int length; /* Length of the name */
|
||||
uint32_t number; /* Group number */
|
||||
uint16_t length; /* Length of the name */
|
||||
uint16_t isdup; /* TRUE if a duplicate */
|
||||
} named_group;
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
|
|
|
@ -179,6 +179,9 @@
|
|||
/(*UTF-32)\x{11234}/
|
||||
abcd\x{11234}pqr
|
||||
|
||||
/(*UTF-32)\x{112}/
|
||||
abcd\x{11234}pqr
|
||||
|
||||
/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
|
||||
|
||||
/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
|
||||
|
|
|
@ -4289,6 +4289,17 @@ a random value. /Ix
|
|||
|
||||
/A\8B\9C/
|
||||
A8B9C
|
||||
|
||||
/(?x:((?'a')) # comment (with parentheses) and | vertical
|
||||
(?-x:#not a comment (?'b')) # this is a comment ()
|
||||
(?'c')) # not a comment (?'d')/info
|
||||
|
||||
/(?|(?'a')(2)(?'b')|(?'a')(?'a')(3))/I,dupnames
|
||||
A23B
|
||||
B32A
|
||||
|
||||
# These are some patterns that used to cause buffer overflows or other errors
|
||||
# while compiling.
|
||||
|
||||
/.((?2)(?R)|\1|$)()/B
|
||||
|
||||
|
@ -4310,4 +4321,6 @@ a random value. /Ix
|
|||
|
||||
/A(?'')Z/
|
||||
|
||||
"(?J:(?|(?'R')(\k'R')|((?'R'))))"
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -251,4 +251,11 @@
|
|||
|
||||
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
||||
|
||||
# Check the absolute limit on nesting (?| etc. This varies with code unit
|
||||
# width because the workspace is a different number of bytes.
|
||||
|
||||
/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|
|
||||
)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))
|
||||
/parens_nest_limit=1000
|
||||
|
||||
# End of testinput9
|
||||
|
|
|
@ -647,6 +647,10 @@ Subject length lower bound = 1
|
|||
0: \x{11234}
|
||||
|
||||
/(*UTF-32)\x{11234}/
|
||||
Failed: error 134 at offset 17: character code point value in \x{} or \o{} is too large
|
||||
abcd\x{11234}pqr
|
||||
|
||||
/(*UTF-32)\x{112}/
|
||||
Failed: error 160 at offset 5: (*VERB) not recognized or malformed
|
||||
abcd\x{11234}pqr
|
||||
|
||||
|
|
|
@ -642,6 +642,10 @@ Subject length lower bound = 1
|
|||
Failed: error 160 at offset 5: (*VERB) not recognized or malformed
|
||||
abcd\x{11234}pqr
|
||||
|
||||
/(*UTF-32)\x{112}/
|
||||
Failed: error 160 at offset 5: (*VERB) not recognized or malformed
|
||||
abcd\x{11234}pqr
|
||||
|
||||
/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
|
||||
Failed: error 160 at offset 12: (*VERB) not recognized or malformed
|
||||
|
||||
|
|
|
@ -13293,7 +13293,7 @@ Failed: error 150 at offset 4: invalid range in character class
|
|||
Failed: error 144 at offset 3: group name must start with a non-digit
|
||||
|
||||
/(?&1abc)xx(?<1abc>y)/
|
||||
Failed: error 144 at offset 3: group name must start with a non-digit
|
||||
Failed: error 144 at offset 13: group name must start with a non-digit
|
||||
|
||||
/(?<ab-cd>xx)/
|
||||
Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator)
|
||||
|
@ -13320,10 +13320,10 @@ Failed: error 144 at offset 4: group name must start with a non-digit
|
|||
Failed: error 144 at offset 3: group name must start with a non-digit
|
||||
|
||||
/(?&1abc)xx(?<1abc>y)/
|
||||
Failed: error 144 at offset 3: group name must start with a non-digit
|
||||
Failed: error 144 at offset 13: group name must start with a non-digit
|
||||
|
||||
/(?P>1abc)xx(?<1abc>y)/
|
||||
Failed: error 144 at offset 4: group name must start with a non-digit
|
||||
Failed: error 144 at offset 14: group name must start with a non-digit
|
||||
|
||||
/\g'3gh'/
|
||||
Failed: error 157 at offset 7: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number
|
||||
|
@ -14347,6 +14347,42 @@ Failed: error 115 at offset 3: reference to non-existent subpattern
|
|||
/A\8B\9C/
|
||||
Failed: error 115 at offset 7: reference to non-existent subpattern
|
||||
A8B9C
|
||||
|
||||
/(?x:((?'a')) # comment (with parentheses) and | vertical
|
||||
(?-x:#not a comment (?'b')) # this is a comment ()
|
||||
(?'c')) # not a comment (?'d')/info
|
||||
Capturing subpattern count = 5
|
||||
Named capturing subpatterns:
|
||||
a 2
|
||||
b 3
|
||||
c 4
|
||||
d 5
|
||||
First code unit = '#'
|
||||
Last code unit = ' '
|
||||
Subject length lower bound = 32
|
||||
|
||||
/(?|(?'a')(2)(?'b')|(?'a')(?'a')(3))/I,dupnames
|
||||
Capturing subpattern count = 3
|
||||
Named capturing subpatterns:
|
||||
a 1
|
||||
a 2
|
||||
b 3
|
||||
Options: dupnames
|
||||
Starting code units: 2 3
|
||||
Subject length lower bound = 1
|
||||
A23B
|
||||
0: 2
|
||||
1:
|
||||
2: 2
|
||||
3:
|
||||
B32A
|
||||
0: 3
|
||||
1:
|
||||
2:
|
||||
3: 3
|
||||
|
||||
# These are some patterns that used to cause buffer overflows or other errors
|
||||
# while compiling.
|
||||
|
||||
/.((?2)(?R)|\1|$)()/B
|
||||
------------------------------------------------------------------
|
||||
|
@ -14411,4 +14447,6 @@ Failed: error 125 at offset 72: lookbehind assertion is not fixed length
|
|||
/A(?'')Z/
|
||||
Failed: error 162 at offset 4: subpattern name expected
|
||||
|
||||
"(?J:(?|(?'R')(\k'R')|((?'R'))))"
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -356,4 +356,12 @@ Failed: error 177 at offset 6: character code point value in \u.... sequence is
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# Check the absolute limit on nesting (?| etc. This varies with code unit
|
||||
# width because the workspace is a different number of bytes.
|
||||
|
||||
/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|
|
||||
)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))
|
||||
/parens_nest_limit=1000
|
||||
Failed: error 184 at offset 1540: (?| and/or (?J: or (?x: parentheses are too deeply nested
|
||||
|
||||
# End of testinput9
|
||||
|
|
Loading…
Reference in New Issue