Add a fancy test for multiple named subpatterns.

This commit is contained in:
Philip.Hazel 2017-04-22 14:35:14 +00:00
parent 504a073a89
commit 05a8186117
2 changed files with 133 additions and 0 deletions

61
testdata/testinput1 vendored
View File

@ -5920,4 +5920,65 @@ ef) x/x,mark
/^(?1)\d{3}(a)/ /^(?1)\d{3}(a)/
a123a a123a
# This pattern uses a lot of named subpatterns in order to match email
# addresses in various formats. It's a heavy test for named subpatterns. In the
# <atext> group, slash is coded as \x{2f} so that this pattern can also be
# processed by perltest.sh, which does not cater for an escaped delimiter
# within the pattern. All $ and @ characters in subject strings are escaped so
# that Perl doesn't interpret them as variable insertions and " characters must
# also be escaped for Perl.
# This set of subpatterns is more or less a direct transliteration of the BNF
# definitions in RFC2822, without any of the obsolete features. The addition of
# a possessive + to the definition of <phrase> reduced the match limit in PCRE2
# from over 5 million to just under 400, and eliminated a very noticeable delay
# when this file was passed to perltest.sh.
/(?ix)(?(DEFINE)
(?<addr_spec> (?&local_part) \@ (?&domain) )
(?<angle_addr> (?&CFWS)?+ < (?&addr_spec) > (?&CFWS)?+ )
(?<atext> [a-z\d!#$%&'*+-\x{2f}=?^_`{|}~] )
(?<atom> (?&CFWS)?+ (?&atext)+ (?&CFWS)?+ )
(?<ccontent> (?&ctext) | (?&quoted_pair) | (?&comment) )
(?<ctext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ ()\\] )
(?<comment> \( (?: (?&FWS)?+ (?&ccontent) )*+ (?&FWS)?+ \) )
(?<CFWS> (?: (?&FWS)?+ (?&comment) )* (?# NOT possessive)
(?: (?&FWS)?+ (?&comment) | (?&FWS) ) )
(?<dcontent> (?&dtext) | (?&quoted_pair) )
(?<display_name> (?&phrase) )
(?<domain> (?&dot_atom) | (?&domain_literal) )
(?<domain_literal> (?&CFWS)?+ \[ (?: (?&FWS)?+ (?&dcontent) )* (?&FWS)?+ \]
(?&CFWS)?+ )
(?<dot_atom> (?&CFWS)?+ (?&dot_atom_text) (?&CFWS)?+ )
(?<dot_atom_text> (?&atext)++ (?: \. (?&atext)++)*+ )
(?<dtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ \[\]\\] )
(?<FWS> (?: [\t\ ]*+ \n)?+ [\t\ ]++ )
(?<local_part> (?&dot_atom) | (?&quoted_string) )
(?<mailbox> (?&name_addr) | (?&addr_spec) )
(?<name_addr> (?&display_name)? (?&angle_addr) )
(?<phrase> (?&word)++ )
(?<qcontent> (?&qtext) | (?&quoted_pair) )
(?<quoted_pair> " (?&text) )
(?<quoted_string> (?&CFWS)?+ " (?: (?&FWS)?+ (?&qcontent))* (?&FWS)?+ "
(?&CFWS)?+ )
(?<qtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ "\\] )
(?<text> [^\r\n] )
(?<word> (?&atom) | (?&quoted_string) )
) # End DEFINE
^(?&mailbox)$/
Alan Other <user\@dom.ain>
<user\@dom.ain>
user\@dom.ain
user\@[]
user\@[domain literal]
user\@[domain literal with \"[square brackets\"] inside]
\"A. Other\" <user.1234\@dom.ain> (a comment)
A. Other <user.1234\@dom.ain> (a comment)
\"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay
\= Expect no match
A missing angle <user\@some.where
The quick brown fox
# --------------------------------------------------------------------------
# End of testinput1 # End of testinput1

72
testdata/testoutput1 vendored
View File

@ -9492,4 +9492,76 @@ No match
0: a123a 0: a123a
1: a 1: a
# This pattern uses a lot of named subpatterns in order to match email
# addresses in various formats. It's a heavy test for named subpatterns. In the
# <atext> group, slash is coded as \x{2f} so that this pattern can also be
# processed by perltest.sh, which does not cater for an escaped delimiter
# within the pattern. All $ and @ characters in subject strings are escaped so
# that Perl doesn't interpret them as variable insertions and " characters must
# also be escaped for Perl.
# This set of subpatterns is more or less a direct transliteration of the BNF
# definitions in RFC2822, without any of the obsolete features. The addition of
# a possessive + to the definition of <phrase> reduced the match limit in PCRE2
# from over 5 million to just under 400, and eliminated a very noticeable delay
# when this file was passed to perltest.sh.
/(?ix)(?(DEFINE)
(?<addr_spec> (?&local_part) \@ (?&domain) )
(?<angle_addr> (?&CFWS)?+ < (?&addr_spec) > (?&CFWS)?+ )
(?<atext> [a-z\d!#$%&'*+-\x{2f}=?^_`{|}~] )
(?<atom> (?&CFWS)?+ (?&atext)+ (?&CFWS)?+ )
(?<ccontent> (?&ctext) | (?&quoted_pair) | (?&comment) )
(?<ctext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ ()\\] )
(?<comment> \( (?: (?&FWS)?+ (?&ccontent) )*+ (?&FWS)?+ \) )
(?<CFWS> (?: (?&FWS)?+ (?&comment) )* (?# NOT possessive)
(?: (?&FWS)?+ (?&comment) | (?&FWS) ) )
(?<dcontent> (?&dtext) | (?&quoted_pair) )
(?<display_name> (?&phrase) )
(?<domain> (?&dot_atom) | (?&domain_literal) )
(?<domain_literal> (?&CFWS)?+ \[ (?: (?&FWS)?+ (?&dcontent) )* (?&FWS)?+ \]
(?&CFWS)?+ )
(?<dot_atom> (?&CFWS)?+ (?&dot_atom_text) (?&CFWS)?+ )
(?<dot_atom_text> (?&atext)++ (?: \. (?&atext)++)*+ )
(?<dtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ \[\]\\] )
(?<FWS> (?: [\t\ ]*+ \n)?+ [\t\ ]++ )
(?<local_part> (?&dot_atom) | (?&quoted_string) )
(?<mailbox> (?&name_addr) | (?&addr_spec) )
(?<name_addr> (?&display_name)? (?&angle_addr) )
(?<phrase> (?&word)++ )
(?<qcontent> (?&qtext) | (?&quoted_pair) )
(?<quoted_pair> " (?&text) )
(?<quoted_string> (?&CFWS)?+ " (?: (?&FWS)?+ (?&qcontent))* (?&FWS)?+ "
(?&CFWS)?+ )
(?<qtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ "\\] )
(?<text> [^\r\n] )
(?<word> (?&atom) | (?&quoted_string) )
) # End DEFINE
^(?&mailbox)$/
Alan Other <user\@dom.ain>
0: Alan Other <user@dom.ain>
<user\@dom.ain>
0: <user@dom.ain>
user\@dom.ain
0: user@dom.ain
user\@[]
0: user@[]
user\@[domain literal]
0: user@[domain literal]
user\@[domain literal with \"[square brackets\"] inside]
0: user@[domain literal with "[square brackets"] inside]
\"A. Other\" <user.1234\@dom.ain> (a comment)
0: "A. Other" <user.1234@dom.ain> (a comment)
A. Other <user.1234\@dom.ain> (a comment)
0: A. Other <user.1234@dom.ain> (a comment)
\"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay
0: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay
\= Expect no match
A missing angle <user\@some.where
No match
The quick brown fox
No match
# --------------------------------------------------------------------------
# End of testinput1 # End of testinput1