Add a fancy test for multiple named subpatterns.
This commit is contained in:
parent
504a073a89
commit
05a8186117
|
@ -5920,4 +5920,65 @@ ef) x/x,mark
|
||||||
/^(?1)\d{3}(a)/
|
/^(?1)\d{3}(a)/
|
||||||
a123a
|
a123a
|
||||||
|
|
||||||
|
# This pattern uses a lot of named subpatterns in order to match email
|
||||||
|
# addresses in various formats. It's a heavy test for named subpatterns. In the
|
||||||
|
# <atext> group, slash is coded as \x{2f} so that this pattern can also be
|
||||||
|
# processed by perltest.sh, which does not cater for an escaped delimiter
|
||||||
|
# within the pattern. All $ and @ characters in subject strings are escaped so
|
||||||
|
# that Perl doesn't interpret them as variable insertions and " characters must
|
||||||
|
# also be escaped for Perl.
|
||||||
|
|
||||||
|
# This set of subpatterns is more or less a direct transliteration of the BNF
|
||||||
|
# definitions in RFC2822, without any of the obsolete features. The addition of
|
||||||
|
# a possessive + to the definition of <phrase> reduced the match limit in PCRE2
|
||||||
|
# from over 5 million to just under 400, and eliminated a very noticeable delay
|
||||||
|
# when this file was passed to perltest.sh.
|
||||||
|
|
||||||
|
/(?ix)(?(DEFINE)
|
||||||
|
(?<addr_spec> (?&local_part) \@ (?&domain) )
|
||||||
|
(?<angle_addr> (?&CFWS)?+ < (?&addr_spec) > (?&CFWS)?+ )
|
||||||
|
(?<atext> [a-z\d!#$%&'*+-\x{2f}=?^_`{|}~] )
|
||||||
|
(?<atom> (?&CFWS)?+ (?&atext)+ (?&CFWS)?+ )
|
||||||
|
(?<ccontent> (?&ctext) | (?"ed_pair) | (?&comment) )
|
||||||
|
(?<ctext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ ()\\] )
|
||||||
|
(?<comment> \( (?: (?&FWS)?+ (?&ccontent) )*+ (?&FWS)?+ \) )
|
||||||
|
(?<CFWS> (?: (?&FWS)?+ (?&comment) )* (?# NOT possessive)
|
||||||
|
(?: (?&FWS)?+ (?&comment) | (?&FWS) ) )
|
||||||
|
(?<dcontent> (?&dtext) | (?"ed_pair) )
|
||||||
|
(?<display_name> (?&phrase) )
|
||||||
|
(?<domain> (?&dot_atom) | (?&domain_literal) )
|
||||||
|
(?<domain_literal> (?&CFWS)?+ \[ (?: (?&FWS)?+ (?&dcontent) )* (?&FWS)?+ \]
|
||||||
|
(?&CFWS)?+ )
|
||||||
|
(?<dot_atom> (?&CFWS)?+ (?&dot_atom_text) (?&CFWS)?+ )
|
||||||
|
(?<dot_atom_text> (?&atext)++ (?: \. (?&atext)++)*+ )
|
||||||
|
(?<dtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ \[\]\\] )
|
||||||
|
(?<FWS> (?: [\t\ ]*+ \n)?+ [\t\ ]++ )
|
||||||
|
(?<local_part> (?&dot_atom) | (?"ed_string) )
|
||||||
|
(?<mailbox> (?&name_addr) | (?&addr_spec) )
|
||||||
|
(?<name_addr> (?&display_name)? (?&angle_addr) )
|
||||||
|
(?<phrase> (?&word)++ )
|
||||||
|
(?<qcontent> (?&qtext) | (?"ed_pair) )
|
||||||
|
(?<quoted_pair> " (?&text) )
|
||||||
|
(?<quoted_string> (?&CFWS)?+ " (?: (?&FWS)?+ (?&qcontent))* (?&FWS)?+ "
|
||||||
|
(?&CFWS)?+ )
|
||||||
|
(?<qtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ "\\] )
|
||||||
|
(?<text> [^\r\n] )
|
||||||
|
(?<word> (?&atom) | (?"ed_string) )
|
||||||
|
) # End DEFINE
|
||||||
|
^(?&mailbox)$/
|
||||||
|
Alan Other <user\@dom.ain>
|
||||||
|
<user\@dom.ain>
|
||||||
|
user\@dom.ain
|
||||||
|
user\@[]
|
||||||
|
user\@[domain literal]
|
||||||
|
user\@[domain literal with \"[square brackets\"] inside]
|
||||||
|
\"A. Other\" <user.1234\@dom.ain> (a comment)
|
||||||
|
A. Other <user.1234\@dom.ain> (a comment)
|
||||||
|
\"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay
|
||||||
|
\= Expect no match
|
||||||
|
A missing angle <user\@some.where
|
||||||
|
The quick brown fox
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
|
||||||
# End of testinput1
|
# End of testinput1
|
||||||
|
|
|
@ -9492,4 +9492,76 @@ No match
|
||||||
0: a123a
|
0: a123a
|
||||||
1: a
|
1: a
|
||||||
|
|
||||||
|
# This pattern uses a lot of named subpatterns in order to match email
|
||||||
|
# addresses in various formats. It's a heavy test for named subpatterns. In the
|
||||||
|
# <atext> group, slash is coded as \x{2f} so that this pattern can also be
|
||||||
|
# processed by perltest.sh, which does not cater for an escaped delimiter
|
||||||
|
# within the pattern. All $ and @ characters in subject strings are escaped so
|
||||||
|
# that Perl doesn't interpret them as variable insertions and " characters must
|
||||||
|
# also be escaped for Perl.
|
||||||
|
|
||||||
|
# This set of subpatterns is more or less a direct transliteration of the BNF
|
||||||
|
# definitions in RFC2822, without any of the obsolete features. The addition of
|
||||||
|
# a possessive + to the definition of <phrase> reduced the match limit in PCRE2
|
||||||
|
# from over 5 million to just under 400, and eliminated a very noticeable delay
|
||||||
|
# when this file was passed to perltest.sh.
|
||||||
|
|
||||||
|
/(?ix)(?(DEFINE)
|
||||||
|
(?<addr_spec> (?&local_part) \@ (?&domain) )
|
||||||
|
(?<angle_addr> (?&CFWS)?+ < (?&addr_spec) > (?&CFWS)?+ )
|
||||||
|
(?<atext> [a-z\d!#$%&'*+-\x{2f}=?^_`{|}~] )
|
||||||
|
(?<atom> (?&CFWS)?+ (?&atext)+ (?&CFWS)?+ )
|
||||||
|
(?<ccontent> (?&ctext) | (?"ed_pair) | (?&comment) )
|
||||||
|
(?<ctext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ ()\\] )
|
||||||
|
(?<comment> \( (?: (?&FWS)?+ (?&ccontent) )*+ (?&FWS)?+ \) )
|
||||||
|
(?<CFWS> (?: (?&FWS)?+ (?&comment) )* (?# NOT possessive)
|
||||||
|
(?: (?&FWS)?+ (?&comment) | (?&FWS) ) )
|
||||||
|
(?<dcontent> (?&dtext) | (?"ed_pair) )
|
||||||
|
(?<display_name> (?&phrase) )
|
||||||
|
(?<domain> (?&dot_atom) | (?&domain_literal) )
|
||||||
|
(?<domain_literal> (?&CFWS)?+ \[ (?: (?&FWS)?+ (?&dcontent) )* (?&FWS)?+ \]
|
||||||
|
(?&CFWS)?+ )
|
||||||
|
(?<dot_atom> (?&CFWS)?+ (?&dot_atom_text) (?&CFWS)?+ )
|
||||||
|
(?<dot_atom_text> (?&atext)++ (?: \. (?&atext)++)*+ )
|
||||||
|
(?<dtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ \[\]\\] )
|
||||||
|
(?<FWS> (?: [\t\ ]*+ \n)?+ [\t\ ]++ )
|
||||||
|
(?<local_part> (?&dot_atom) | (?"ed_string) )
|
||||||
|
(?<mailbox> (?&name_addr) | (?&addr_spec) )
|
||||||
|
(?<name_addr> (?&display_name)? (?&angle_addr) )
|
||||||
|
(?<phrase> (?&word)++ )
|
||||||
|
(?<qcontent> (?&qtext) | (?"ed_pair) )
|
||||||
|
(?<quoted_pair> " (?&text) )
|
||||||
|
(?<quoted_string> (?&CFWS)?+ " (?: (?&FWS)?+ (?&qcontent))* (?&FWS)?+ "
|
||||||
|
(?&CFWS)?+ )
|
||||||
|
(?<qtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ "\\] )
|
||||||
|
(?<text> [^\r\n] )
|
||||||
|
(?<word> (?&atom) | (?"ed_string) )
|
||||||
|
) # End DEFINE
|
||||||
|
^(?&mailbox)$/
|
||||||
|
Alan Other <user\@dom.ain>
|
||||||
|
0: Alan Other <user@dom.ain>
|
||||||
|
<user\@dom.ain>
|
||||||
|
0: <user@dom.ain>
|
||||||
|
user\@dom.ain
|
||||||
|
0: user@dom.ain
|
||||||
|
user\@[]
|
||||||
|
0: user@[]
|
||||||
|
user\@[domain literal]
|
||||||
|
0: user@[domain literal]
|
||||||
|
user\@[domain literal with \"[square brackets\"] inside]
|
||||||
|
0: user@[domain literal with "[square brackets"] inside]
|
||||||
|
\"A. Other\" <user.1234\@dom.ain> (a comment)
|
||||||
|
0: "A. Other" <user.1234@dom.ain> (a comment)
|
||||||
|
A. Other <user.1234\@dom.ain> (a comment)
|
||||||
|
0: A. Other <user.1234@dom.ain> (a comment)
|
||||||
|
\"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay
|
||||||
|
0: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay
|
||||||
|
\= Expect no match
|
||||||
|
A missing angle <user\@some.where
|
||||||
|
No match
|
||||||
|
The quick brown fox
|
||||||
|
No match
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------
|
||||||
|
|
||||||
# End of testinput1
|
# End of testinput1
|
||||||
|
|
Loading…
Reference in New Issue