pcre2/testdata/testinput9

334 lines
12 KiB
Plaintext

# This set of tests is run only with the 8-bit library. They do not require
# UTF-8 or Unicode property support. The file starts with all the tests of
# the POSIX interface, because that is supported only with the 8-bit library.
#forbid_utf
#pattern posix
/abc/
abc
*** Failers
/^abc|def/
abcdef
abcdef\=notbol
/.*((abc)$|(def))/
defabc
defabc\=noteol
/the quick brown fox/
the quick brown fox
*** Failers
The Quick Brown Fox
/the quick brown fox/i
the quick brown fox
The Quick Brown Fox
/abc.def/
*** Failers
abc\ndef
/abc$/
abc
abc\n
/(abc)\2/
/(abc\1)/
abc
/a*(b+)(z)(z)/
aaaabbbbzzzz
aaaabbbbzzzz\=ovector=0
aaaabbbbzzzz\=ovector=1
aaaabbbbzzzz\=ovector=2
/ab.cd/
ab-cd
ab=cd
** Failers
ab\ncd
/ab.cd/s
ab-cd
ab=cd
ab\ncd
/a(b)c/no_auto_capture
abc
/a(?P<name>b)c/no_auto_capture
abc
/a?|b?/
abc
** Failers
ddd\=notempty
/\w+A/
CDAAAAB
/\w+A/ungreedy
CDAAAAB
/\Biss\B/I,aftertext
Mississippi
/abc/\
#pattern -posix
# End of POSIX tests
/a\Cb/
aXb
a\nb
** Failers (too big char)
A\x{123}B
A\o{443}B
/\x{100}/I
/\o{400}/I
/ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional leading comment
(?: (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # initial word
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) )* # further okay, if led by a period
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
# address
| # or
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # one word, optionally followed by....
(?:
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
\(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) | # comments, or...
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
# quoted strings
)*
< (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # leading <
(?: @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* , (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
)* # further okay, if led by comma
: # closing colon
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* )? # optional route
(?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) # initial word
(?: (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
" (?: # opening quote...
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
| # or
\\ [^\x80-\xff] # Escaped something (something != CR)
)* " # closing quote
) )* # further okay, if led by a period
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* @ (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # initial subdomain
(?: #
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* \. # if led by a period...
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* (?:
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
| \[ # [
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
\] # ]
) # ...further okay
)*
# address spec
(?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* > # trailing >
# name and address
) (?: [\040\t] | \(
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
\) )* # optional trailing comment
/Ix
/\h/I
/\H/I
/\v/I
/\V/I
/\R/I
/[\h]/B
>\x09<
/[\h]+/B
>\x09\x20\xa0<
/[\v]/B
/[\H]/B
/[^\h]/B
/[\V]/B
/[\x0a\V]/B
/\777/I
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
XX
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
XX
/\u0100/alt_bsux,allow_empty_class,match_unset_backref,dupnames
/[\u0100-\u0200]/alt_bsux,allow_empty_class,match_unset_backref,dupnames
/[^\x00-a]{12,}[^b-\xff]*/B
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
# End of testinput9