358 lines
14 KiB
Plaintext
358 lines
14 KiB
Plaintext
|
# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
|
||
|
# features that are not compatible with the 8-bit library, or which give
|
||
|
# different output in 16-bit or 32-bit mode. The output for the two widths is
|
||
|
# different, so they have separate output files.
|
||
|
|
||
|
#forbid_utf
|
||
|
|
||
|
/a\Cb/
|
||
|
aXb
|
||
|
a\nb
|
||
|
|
||
|
/[^\x{c4}]/IB
|
||
|
|
||
|
/\x{100}/I
|
||
|
|
||
|
/ (?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* # optional leading comment
|
||
|
(?: (?:
|
||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||
|
|
|
||
|
" (?: # opening quote...
|
||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||
|
| # or
|
||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||
|
)* " # closing quote
|
||
|
) # initial word
|
||
|
(?: (?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* \. (?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* (?:
|
||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||
|
|
|
||
|
" (?: # opening quote...
|
||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||
|
| # or
|
||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||
|
)* " # closing quote
|
||
|
) )* # further okay, if led by a period
|
||
|
(?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* @ (?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* (?:
|
||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||
|
| \[ # [
|
||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||
|
\] # ]
|
||
|
) # initial subdomain
|
||
|
(?: #
|
||
|
(?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* \. # if led by a period...
|
||
|
(?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* (?:
|
||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||
|
| \[ # [
|
||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||
|
\] # ]
|
||
|
) # ...further okay
|
||
|
)*
|
||
|
# address
|
||
|
| # or
|
||
|
(?:
|
||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||
|
|
|
||
|
" (?: # opening quote...
|
||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||
|
| # or
|
||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||
|
)* " # closing quote
|
||
|
) # one word, optionally followed by....
|
||
|
(?:
|
||
|
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
|
||
|
\(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) | # comments, or...
|
||
|
|
||
|
" (?: # opening quote...
|
||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||
|
| # or
|
||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||
|
)* " # closing quote
|
||
|
# quoted strings
|
||
|
)*
|
||
|
< (?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* # leading <
|
||
|
(?: @ (?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* (?:
|
||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||
|
| \[ # [
|
||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||
|
\] # ]
|
||
|
) # initial subdomain
|
||
|
(?: #
|
||
|
(?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* \. # if led by a period...
|
||
|
(?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* (?:
|
||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||
|
| \[ # [
|
||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||
|
\] # ]
|
||
|
) # ...further okay
|
||
|
)*
|
||
|
|
||
|
(?: (?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* , (?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* @ (?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* (?:
|
||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||
|
| \[ # [
|
||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||
|
\] # ]
|
||
|
) # initial subdomain
|
||
|
(?: #
|
||
|
(?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* \. # if led by a period...
|
||
|
(?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* (?:
|
||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||
|
| \[ # [
|
||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||
|
\] # ]
|
||
|
) # ...further okay
|
||
|
)*
|
||
|
)* # further okay, if led by comma
|
||
|
: # closing colon
|
||
|
(?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* )? # optional route
|
||
|
(?:
|
||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||
|
|
|
||
|
" (?: # opening quote...
|
||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||
|
| # or
|
||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||
|
)* " # closing quote
|
||
|
) # initial word
|
||
|
(?: (?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* \. (?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* (?:
|
||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||
|
|
|
||
|
" (?: # opening quote...
|
||
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
||
|
| # or
|
||
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
||
|
)* " # closing quote
|
||
|
) )* # further okay, if led by a period
|
||
|
(?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* @ (?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* (?:
|
||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||
|
| \[ # [
|
||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||
|
\] # ]
|
||
|
) # initial subdomain
|
||
|
(?: #
|
||
|
(?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* \. # if led by a period...
|
||
|
(?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* (?:
|
||
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
||
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
||
|
| \[ # [
|
||
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
||
|
\] # ]
|
||
|
) # ...further okay
|
||
|
)*
|
||
|
# address spec
|
||
|
(?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* > # trailing >
|
||
|
# name and address
|
||
|
) (?: [\040\t] | \(
|
||
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
||
|
\) )* # optional trailing comment
|
||
|
/Ix
|
||
|
|
||
|
/[\h]/B
|
||
|
>\x09<
|
||
|
|
||
|
/[\h]+/B
|
||
|
>\x09\x20\xa0<
|
||
|
|
||
|
/[\v]/B
|
||
|
|
||
|
/[^\h]/B
|
||
|
|
||
|
/\h+/I
|
||
|
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||
|
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||
|
|
||
|
/[\h\x{dc00}]+/IB
|
||
|
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
||
|
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
||
|
|
||
|
/\H+/I
|
||
|
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||
|
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||
|
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||
|
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||
|
|
||
|
/[\H\x{d800}]+/
|
||
|
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
||
|
\x{2000}\x{200a}\x{1fff}\x{200b}
|
||
|
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
||
|
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
||
|
|
||
|
/\v+/I
|
||
|
\x{2027}\x{2030}\x{2028}\x{2029}
|
||
|
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||
|
|
||
|
/[\v\x{dc00}]+/IB
|
||
|
\x{2027}\x{2030}\x{2028}\x{2029}
|
||
|
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||
|
|
||
|
/\V+/I
|
||
|
\x{2028}\x{2029}\x{2027}\x{2030}
|
||
|
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||
|
|
||
|
/[\V\x{d800}]+/
|
||
|
\x{2028}\x{2029}\x{2027}\x{2030}
|
||
|
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
||
|
|
||
|
/\R+/I,bsr=unicode
|
||
|
\x{2027}\x{2030}\x{2028}\x{2029}
|
||
|
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
||
|
|
||
|
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
|
||
|
\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
|
||
|
|
||
|
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
|
||
|
|
||
|
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
|
||
|
|
||
|
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
|
||
|
|
||
|
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
|
||
|
|
||
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
|
||
|
XX
|
||
|
|
||
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
|
||
|
XX
|
||
|
|
||
|
/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
|
||
|
|
||
|
/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
|
||
|
|
||
|
/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
|
||
|
|
||
|
/^\x{ffff}+/i
|
||
|
\x{ffff}
|
||
|
|
||
|
/^\x{ffff}?/i
|
||
|
\x{ffff}
|
||
|
|
||
|
/^\x{ffff}*/i
|
||
|
\x{ffff}
|
||
|
|
||
|
/^\x{ffff}{3}/i
|
||
|
\x{ffff}\x{ffff}\x{ffff}
|
||
|
|
||
|
/^\x{ffff}{0,3}/i
|
||
|
\x{ffff}
|
||
|
|
||
|
/[^\x00-a]{12,}[^b-\xff]*/B
|
||
|
|
||
|
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
||
|
|
||
|
/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
|
||
|
|
||
|
/^[\x{1234}\x{4321}]{2,4}?/
|
||
|
\x{1234}\x{1234}\x{1234}
|
||
|
|
||
|
# Check maximum non-UTF character size for the 16-bit library.
|
||
|
|
||
|
/\x{ffff}/
|
||
|
A\x{ffff}B
|
||
|
|
||
|
/\x{10000}/
|
||
|
|
||
|
/\o{20000}/
|
||
|
|
||
|
# Check maximum character size for the 32-bit library. These will all give
|
||
|
# errors in the 16-bit library.
|
||
|
|
||
|
/\x{110000}/
|
||
|
|
||
|
/\x{7fffffff}/
|
||
|
|
||
|
/\x{80000000}/
|
||
|
|
||
|
/\x{ffffffff}/
|
||
|
|
||
|
/\x{100000000}/
|
||
|
|
||
|
/\o{17777777777}/
|
||
|
|
||
|
/\o{20000000000}/
|
||
|
|
||
|
/\o{37777777777}/
|
||
|
|
||
|
/\o{40000000000}/
|
||
|
|
||
|
/\x{7fffffff}\x{7fffffff}/I
|
||
|
|
||
|
/\x{80000000}\x{80000000}/I
|
||
|
|
||
|
/\x{ffffffff}\x{ffffffff}/I
|
||
|
|
||
|
# Non-UTF characters
|
||
|
|
||
|
/\C{2,3}/
|
||
|
\x{400000}\x{400001}\x{400002}\x{400003}
|
||
|
|
||
|
/\x{400000}\x{800000}/IBi
|
||
|
|
||
|
# Check character ranges
|
||
|
|
||
|
/[\H]/IB
|
||
|
|
||
|
/[\V]/IB
|
||
|
|
||
|
# End of testinput11
|