Add a fancy test for multiple named subpatterns.
This commit is contained in:
parent
504a073a89
commit
05a8186117
|
@ -5920,4 +5920,65 @@ ef) x/x,mark
|
|||
/^(?1)\d{3}(a)/
|
||||
a123a
|
||||
|
||||
# This pattern uses a lot of named subpatterns in order to match email
|
||||
# addresses in various formats. It's a heavy test for named subpatterns. In the
|
||||
# <atext> group, slash is coded as \x{2f} so that this pattern can also be
|
||||
# processed by perltest.sh, which does not cater for an escaped delimiter
|
||||
# within the pattern. All $ and @ characters in subject strings are escaped so
|
||||
# that Perl doesn't interpret them as variable insertions and " characters must
|
||||
# also be escaped for Perl.
|
||||
|
||||
# This set of subpatterns is more or less a direct transliteration of the BNF
|
||||
# definitions in RFC2822, without any of the obsolete features. The addition of
|
||||
# a possessive + to the definition of <phrase> reduced the match limit in PCRE2
|
||||
# from over 5 million to just under 400, and eliminated a very noticeable delay
|
||||
# when this file was passed to perltest.sh.
|
||||
|
||||
/(?ix)(?(DEFINE)
|
||||
(?<addr_spec> (?&local_part) \@ (?&domain) )
|
||||
(?<angle_addr> (?&CFWS)?+ < (?&addr_spec) > (?&CFWS)?+ )
|
||||
(?<atext> [a-z\d!#$%&'*+-\x{2f}=?^_`{|}~] )
|
||||
(?<atom> (?&CFWS)?+ (?&atext)+ (?&CFWS)?+ )
|
||||
(?<ccontent> (?&ctext) | (?"ed_pair) | (?&comment) )
|
||||
(?<ctext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ ()\\] )
|
||||
(?<comment> \( (?: (?&FWS)?+ (?&ccontent) )*+ (?&FWS)?+ \) )
|
||||
(?<CFWS> (?: (?&FWS)?+ (?&comment) )* (?# NOT possessive)
|
||||
(?: (?&FWS)?+ (?&comment) | (?&FWS) ) )
|
||||
(?<dcontent> (?&dtext) | (?"ed_pair) )
|
||||
(?<display_name> (?&phrase) )
|
||||
(?<domain> (?&dot_atom) | (?&domain_literal) )
|
||||
(?<domain_literal> (?&CFWS)?+ \[ (?: (?&FWS)?+ (?&dcontent) )* (?&FWS)?+ \]
|
||||
(?&CFWS)?+ )
|
||||
(?<dot_atom> (?&CFWS)?+ (?&dot_atom_text) (?&CFWS)?+ )
|
||||
(?<dot_atom_text> (?&atext)++ (?: \. (?&atext)++)*+ )
|
||||
(?<dtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ \[\]\\] )
|
||||
(?<FWS> (?: [\t\ ]*+ \n)?+ [\t\ ]++ )
|
||||
(?<local_part> (?&dot_atom) | (?"ed_string) )
|
||||
(?<mailbox> (?&name_addr) | (?&addr_spec) )
|
||||
(?<name_addr> (?&display_name)? (?&angle_addr) )
|
||||
(?<phrase> (?&word)++ )
|
||||
(?<qcontent> (?&qtext) | (?"ed_pair) )
|
||||
(?<quoted_pair> " (?&text) )
|
||||
(?<quoted_string> (?&CFWS)?+ " (?: (?&FWS)?+ (?&qcontent))* (?&FWS)?+ "
|
||||
(?&CFWS)?+ )
|
||||
(?<qtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ "\\] )
|
||||
(?<text> [^\r\n] )
|
||||
(?<word> (?&atom) | (?"ed_string) )
|
||||
) # End DEFINE
|
||||
^(?&mailbox)$/
|
||||
Alan Other <user\@dom.ain>
|
||||
<user\@dom.ain>
|
||||
user\@dom.ain
|
||||
user\@[]
|
||||
user\@[domain literal]
|
||||
user\@[domain literal with \"[square brackets\"] inside]
|
||||
\"A. Other\" <user.1234\@dom.ain> (a comment)
|
||||
A. Other <user.1234\@dom.ain> (a comment)
|
||||
\"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay
|
||||
\= Expect no match
|
||||
A missing angle <user\@some.where
|
||||
The quick brown fox
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
# End of testinput1
|
||||
|
|
|
@ -9492,4 +9492,76 @@ No match
|
|||
0: a123a
|
||||
1: a
|
||||
|
||||
# This pattern uses a lot of named subpatterns in order to match email
|
||||
# addresses in various formats. It's a heavy test for named subpatterns. In the
|
||||
# <atext> group, slash is coded as \x{2f} so that this pattern can also be
|
||||
# processed by perltest.sh, which does not cater for an escaped delimiter
|
||||
# within the pattern. All $ and @ characters in subject strings are escaped so
|
||||
# that Perl doesn't interpret them as variable insertions and " characters must
|
||||
# also be escaped for Perl.
|
||||
|
||||
# This set of subpatterns is more or less a direct transliteration of the BNF
|
||||
# definitions in RFC2822, without any of the obsolete features. The addition of
|
||||
# a possessive + to the definition of <phrase> reduced the match limit in PCRE2
|
||||
# from over 5 million to just under 400, and eliminated a very noticeable delay
|
||||
# when this file was passed to perltest.sh.
|
||||
|
||||
/(?ix)(?(DEFINE)
|
||||
(?<addr_spec> (?&local_part) \@ (?&domain) )
|
||||
(?<angle_addr> (?&CFWS)?+ < (?&addr_spec) > (?&CFWS)?+ )
|
||||
(?<atext> [a-z\d!#$%&'*+-\x{2f}=?^_`{|}~] )
|
||||
(?<atom> (?&CFWS)?+ (?&atext)+ (?&CFWS)?+ )
|
||||
(?<ccontent> (?&ctext) | (?"ed_pair) | (?&comment) )
|
||||
(?<ctext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ ()\\] )
|
||||
(?<comment> \( (?: (?&FWS)?+ (?&ccontent) )*+ (?&FWS)?+ \) )
|
||||
(?<CFWS> (?: (?&FWS)?+ (?&comment) )* (?# NOT possessive)
|
||||
(?: (?&FWS)?+ (?&comment) | (?&FWS) ) )
|
||||
(?<dcontent> (?&dtext) | (?"ed_pair) )
|
||||
(?<display_name> (?&phrase) )
|
||||
(?<domain> (?&dot_atom) | (?&domain_literal) )
|
||||
(?<domain_literal> (?&CFWS)?+ \[ (?: (?&FWS)?+ (?&dcontent) )* (?&FWS)?+ \]
|
||||
(?&CFWS)?+ )
|
||||
(?<dot_atom> (?&CFWS)?+ (?&dot_atom_text) (?&CFWS)?+ )
|
||||
(?<dot_atom_text> (?&atext)++ (?: \. (?&atext)++)*+ )
|
||||
(?<dtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ \[\]\\] )
|
||||
(?<FWS> (?: [\t\ ]*+ \n)?+ [\t\ ]++ )
|
||||
(?<local_part> (?&dot_atom) | (?"ed_string) )
|
||||
(?<mailbox> (?&name_addr) | (?&addr_spec) )
|
||||
(?<name_addr> (?&display_name)? (?&angle_addr) )
|
||||
(?<phrase> (?&word)++ )
|
||||
(?<qcontent> (?&qtext) | (?"ed_pair) )
|
||||
(?<quoted_pair> " (?&text) )
|
||||
(?<quoted_string> (?&CFWS)?+ " (?: (?&FWS)?+ (?&qcontent))* (?&FWS)?+ "
|
||||
(?&CFWS)?+ )
|
||||
(?<qtext> [^\x{9}\x{10}\x{13}\x{7f}-\x{ff}\ "\\] )
|
||||
(?<text> [^\r\n] )
|
||||
(?<word> (?&atom) | (?"ed_string) )
|
||||
) # End DEFINE
|
||||
^(?&mailbox)$/
|
||||
Alan Other <user\@dom.ain>
|
||||
0: Alan Other <user@dom.ain>
|
||||
<user\@dom.ain>
|
||||
0: <user@dom.ain>
|
||||
user\@dom.ain
|
||||
0: user@dom.ain
|
||||
user\@[]
|
||||
0: user@[]
|
||||
user\@[domain literal]
|
||||
0: user@[domain literal]
|
||||
user\@[domain literal with \"[square brackets\"] inside]
|
||||
0: user@[domain literal with "[square brackets"] inside]
|
||||
\"A. Other\" <user.1234\@dom.ain> (a comment)
|
||||
0: "A. Other" <user.1234@dom.ain> (a comment)
|
||||
A. Other <user.1234\@dom.ain> (a comment)
|
||||
0: A. Other <user.1234@dom.ain> (a comment)
|
||||
\"/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/\"\@x400-re.lay
|
||||
0: "/s=user/ou=host/o=place/prmd=uu.yy/admd= /c=gb/"@x400-re.lay
|
||||
\= Expect no match
|
||||
A missing angle <user\@some.where
|
||||
No match
|
||||
The quick brown fox
|
||||
No match
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
# End of testinput1
|
||||
|
|
Loading…
Reference in New Issue