2014-08-05 18:51:32 +02:00
|
|
|
|
# This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
|
|
|
|
|
# features that are not compatible with the 8-bit library, or which give
|
|
|
|
|
# different output in 16-bit or 32-bit mode. The output for the two widths is
|
|
|
|
|
# different, so they have separate output files.
|
|
|
|
|
|
|
|
|
|
#forbid_utf
|
2015-09-08 19:01:17 +02:00
|
|
|
|
#newline_default LF ANY ANYCRLF
|
2014-08-05 18:51:32 +02:00
|
|
|
|
|
|
|
|
|
/[^\x{c4}]/IB
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
[^\x{c4}]
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Capturing subpattern count = 0
|
|
|
|
|
Subject length lower bound = 1
|
|
|
|
|
|
|
|
|
|
/\x{100}/I
|
|
|
|
|
Capturing subpattern count = 0
|
|
|
|
|
First code unit = \x{100}
|
|
|
|
|
Subject length lower bound = 1
|
|
|
|
|
|
|
|
|
|
/ (?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* # optional leading comment
|
|
|
|
|
(?: (?:
|
|
|
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
|
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
|
|
|
|
|
|
" (?: # opening quote...
|
|
|
|
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
|
|
|
|
| # or
|
|
|
|
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
|
|
|
|
)* " # closing quote
|
|
|
|
|
) # initial word
|
|
|
|
|
(?: (?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* \. (?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* (?:
|
|
|
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
|
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
|
|
|
|
|
|
" (?: # opening quote...
|
|
|
|
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
|
|
|
|
| # or
|
|
|
|
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
|
|
|
|
)* " # closing quote
|
|
|
|
|
) )* # further okay, if led by a period
|
|
|
|
|
(?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* @ (?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* (?:
|
|
|
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
|
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
| \[ # [
|
|
|
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
|
|
|
\] # ]
|
|
|
|
|
) # initial subdomain
|
|
|
|
|
(?: #
|
|
|
|
|
(?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* \. # if led by a period...
|
|
|
|
|
(?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* (?:
|
|
|
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
|
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
| \[ # [
|
|
|
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
|
|
|
\] # ]
|
|
|
|
|
) # ...further okay
|
|
|
|
|
)*
|
|
|
|
|
# address
|
|
|
|
|
| # or
|
|
|
|
|
(?:
|
|
|
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
|
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
|
|
|
|
|
|
" (?: # opening quote...
|
|
|
|
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
|
|
|
|
| # or
|
|
|
|
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
|
|
|
|
)* " # closing quote
|
|
|
|
|
) # one word, optionally followed by....
|
|
|
|
|
(?:
|
|
|
|
|
[^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
|
|
|
|
|
\(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) | # comments, or...
|
|
|
|
|
|
|
|
|
|
" (?: # opening quote...
|
|
|
|
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
|
|
|
|
| # or
|
|
|
|
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
|
|
|
|
)* " # closing quote
|
|
|
|
|
# quoted strings
|
|
|
|
|
)*
|
|
|
|
|
< (?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* # leading <
|
|
|
|
|
(?: @ (?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* (?:
|
|
|
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
|
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
| \[ # [
|
|
|
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
|
|
|
\] # ]
|
|
|
|
|
) # initial subdomain
|
|
|
|
|
(?: #
|
|
|
|
|
(?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* \. # if led by a period...
|
|
|
|
|
(?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* (?:
|
|
|
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
|
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
| \[ # [
|
|
|
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
|
|
|
\] # ]
|
|
|
|
|
) # ...further okay
|
|
|
|
|
)*
|
|
|
|
|
|
|
|
|
|
(?: (?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* , (?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* @ (?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* (?:
|
|
|
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
|
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
| \[ # [
|
|
|
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
|
|
|
\] # ]
|
|
|
|
|
) # initial subdomain
|
|
|
|
|
(?: #
|
|
|
|
|
(?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* \. # if led by a period...
|
|
|
|
|
(?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* (?:
|
|
|
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
|
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
| \[ # [
|
|
|
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
|
|
|
\] # ]
|
|
|
|
|
) # ...further okay
|
|
|
|
|
)*
|
|
|
|
|
)* # further okay, if led by comma
|
|
|
|
|
: # closing colon
|
|
|
|
|
(?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* )? # optional route
|
|
|
|
|
(?:
|
|
|
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
|
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
|
|
|
|
|
|
" (?: # opening quote...
|
|
|
|
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
|
|
|
|
| # or
|
|
|
|
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
|
|
|
|
)* " # closing quote
|
|
|
|
|
) # initial word
|
|
|
|
|
(?: (?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* \. (?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* (?:
|
|
|
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
|
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
|
|
|
|
|
|
" (?: # opening quote...
|
|
|
|
|
[^\\\x80-\xff\n\015"] # Anything except backslash and quote
|
|
|
|
|
| # or
|
|
|
|
|
\\ [^\x80-\xff] # Escaped something (something != CR)
|
|
|
|
|
)* " # closing quote
|
|
|
|
|
) )* # further okay, if led by a period
|
|
|
|
|
(?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* @ (?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* (?:
|
|
|
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
|
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
| \[ # [
|
|
|
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
|
|
|
\] # ]
|
|
|
|
|
) # initial subdomain
|
|
|
|
|
(?: #
|
|
|
|
|
(?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* \. # if led by a period...
|
|
|
|
|
(?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* (?:
|
|
|
|
|
[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
|
|
|
|
|
(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
|
|
|
|
|
| \[ # [
|
|
|
|
|
(?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
|
|
|
|
|
\] # ]
|
|
|
|
|
) # ...further okay
|
|
|
|
|
)*
|
|
|
|
|
# address spec
|
|
|
|
|
(?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* > # trailing >
|
|
|
|
|
# name and address
|
|
|
|
|
) (?: [\040\t] | \(
|
|
|
|
|
(?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
|
|
|
|
|
\) )* # optional trailing comment
|
|
|
|
|
/Ix
|
|
|
|
|
Capturing subpattern count = 0
|
|
|
|
|
Contains explicit CR or LF match
|
|
|
|
|
Options: extended
|
|
|
|
|
Starting code units: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
|
|
|
|
|
9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
|
|
|
|
|
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xff
|
|
|
|
|
Subject length lower bound = 3
|
|
|
|
|
|
|
|
|
|
/[\h]/B
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
>\x09<
|
|
|
|
|
0: \x09
|
|
|
|
|
|
|
|
|
|
/[\h]+/B
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]++
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
>\x09\x20\xa0<
|
|
|
|
|
0: \x09 \xa0
|
|
|
|
|
|
|
|
|
|
/[\v]/B
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
[\x0a-\x0d\x85\x{2028}-\x{2029}]
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/[^\h]/B
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
[^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/\h+/I
|
|
|
|
|
Capturing subpattern count = 0
|
|
|
|
|
Starting code units: \x09 \x20 \xa0 \xff
|
|
|
|
|
Subject length lower bound = 1
|
|
|
|
|
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
|
|
|
|
0: \x{1680}\x{2000}\x{202f}\x{3000}
|
|
|
|
|
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
|
|
|
|
0: \x{200a}\xa0\x{2000}
|
|
|
|
|
|
|
|
|
|
/[\h\x{dc00}]+/IB
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{dc00}]++
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Capturing subpattern count = 0
|
|
|
|
|
Starting code units: \x09 \x20 \xa0 \xff
|
|
|
|
|
Subject length lower bound = 1
|
|
|
|
|
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
|
|
|
|
|
0: \x{1680}\x{2000}\x{202f}\x{3000}
|
|
|
|
|
\x{3001}\x{2fff}\x{200a}\xa0\x{2000}
|
|
|
|
|
0: \x{200a}\xa0\x{2000}
|
|
|
|
|
|
|
|
|
|
/\H+/I
|
|
|
|
|
Capturing subpattern count = 0
|
|
|
|
|
Subject length lower bound = 1
|
|
|
|
|
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
|
|
|
|
0: \x{167f}\x{1681}\x{180d}\x{180f}
|
|
|
|
|
\x{2000}\x{200a}\x{1fff}\x{200b}
|
|
|
|
|
0: \x{1fff}\x{200b}
|
|
|
|
|
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
|
|
|
|
0: \x{202e}\x{2030}\x{205e}\x{2060}
|
|
|
|
|
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
|
|
|
|
0: \x9f\xa1\x{2fff}\x{3001}
|
|
|
|
|
|
|
|
|
|
/[\H\x{d800}]+/
|
|
|
|
|
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
|
|
|
|
|
0: \x{167f}\x{1681}\x{180d}\x{180f}
|
|
|
|
|
\x{2000}\x{200a}\x{1fff}\x{200b}
|
|
|
|
|
0: \x{1fff}\x{200b}
|
|
|
|
|
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
|
|
|
|
|
0: \x{202e}\x{2030}\x{205e}\x{2060}
|
|
|
|
|
\xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
|
|
|
|
|
0: \x9f\xa1\x{2fff}\x{3001}
|
|
|
|
|
|
|
|
|
|
/\v+/I
|
|
|
|
|
Capturing subpattern count = 0
|
|
|
|
|
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
|
|
|
|
Subject length lower bound = 1
|
|
|
|
|
\x{2027}\x{2030}\x{2028}\x{2029}
|
|
|
|
|
0: \x{2028}\x{2029}
|
|
|
|
|
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
|
|
|
|
0: \x85\x0a\x0b\x0c\x0d
|
|
|
|
|
|
|
|
|
|
/[\v\x{dc00}]+/IB
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
[\x0a-\x0d\x85\x{2028}-\x{2029}\x{dc00}]++
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Capturing subpattern count = 0
|
|
|
|
|
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
|
|
|
|
Subject length lower bound = 1
|
|
|
|
|
\x{2027}\x{2030}\x{2028}\x{2029}
|
|
|
|
|
0: \x{2028}\x{2029}
|
|
|
|
|
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
|
|
|
|
0: \x85\x0a\x0b\x0c\x0d
|
|
|
|
|
|
|
|
|
|
/\V+/I
|
|
|
|
|
Capturing subpattern count = 0
|
|
|
|
|
Subject length lower bound = 1
|
|
|
|
|
\x{2028}\x{2029}\x{2027}\x{2030}
|
|
|
|
|
0: \x{2027}\x{2030}
|
|
|
|
|
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
|
|
|
|
0: \x09\x0e\x84\x86
|
|
|
|
|
|
|
|
|
|
/[\V\x{d800}]+/
|
|
|
|
|
\x{2028}\x{2029}\x{2027}\x{2030}
|
|
|
|
|
0: \x{2027}\x{2030}
|
|
|
|
|
\x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
|
|
|
|
|
0: \x09\x0e\x84\x86
|
|
|
|
|
|
|
|
|
|
/\R+/I,bsr=unicode
|
|
|
|
|
Capturing subpattern count = 0
|
2014-08-10 18:09:24 +02:00
|
|
|
|
\R matches any Unicode newline
|
2014-08-05 18:51:32 +02:00
|
|
|
|
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
|
|
|
|
Subject length lower bound = 1
|
|
|
|
|
\x{2027}\x{2030}\x{2028}\x{2029}
|
|
|
|
|
0: \x{2028}\x{2029}
|
|
|
|
|
\x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
|
|
|
|
|
0: \x85\x0a\x0b\x0c\x0d
|
|
|
|
|
|
|
|
|
|
/\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
|
|
|
|
|
Capturing subpattern count = 0
|
|
|
|
|
First code unit = \x{d800}
|
|
|
|
|
Last code unit = \x{dd00}
|
|
|
|
|
Subject length lower bound = 6
|
|
|
|
|
\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
|
|
|
|
|
0: \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
|
|
|
|
|
|
|
|
|
|
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
[^\x{80}]
|
|
|
|
|
[^\x{ff}]
|
|
|
|
|
[^\x{100}]
|
|
|
|
|
[^\x{1000}]
|
|
|
|
|
[^\x{ffff}]
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
/i [^\x{80}]
|
|
|
|
|
/i [^\x{ff}]
|
|
|
|
|
/i [^\x{100}]
|
|
|
|
|
/i [^\x{1000}]
|
|
|
|
|
/i [^\x{ffff}]
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
[^\x{100}]*
|
|
|
|
|
[^\x{1000}]+
|
|
|
|
|
[^\x{ffff}]??
|
|
|
|
|
[^\x{8000}]{4}
|
|
|
|
|
[^\x{8000}]*
|
|
|
|
|
[^\x{7fff}]{2}
|
|
|
|
|
[^\x{7fff}]{0,7}?
|
|
|
|
|
[^\x{100}]{5}
|
|
|
|
|
[^\x{100}]?+
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
/i [^\x{100}]*
|
|
|
|
|
/i [^\x{1000}]+
|
|
|
|
|
/i [^\x{ffff}]??
|
|
|
|
|
/i [^\x{8000}]{4}
|
|
|
|
|
/i [^\x{8000}]*
|
|
|
|
|
/i [^\x{7fff}]{2}
|
|
|
|
|
/i [^\x{7fff}]{0,7}?
|
|
|
|
|
/i [^\x{100}]{5}
|
|
|
|
|
/i [^\x{100}]?+
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
|
|
|
|
|
XX
|
|
|
|
|
0: XX
|
|
|
|
|
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF
|
|
|
|
|
|
|
|
|
|
/(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
|
|
|
|
|
XX
|
|
|
|
|
0: XX
|
|
|
|
|
MK: 0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE
|
|
|
|
|
|
|
|
|
|
/\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
\x{100}
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
[\x{100}-\x{200}]
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
\x{d800}
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/^\x{ffff}+/i
|
|
|
|
|
\x{ffff}
|
|
|
|
|
0: \x{ffff}
|
|
|
|
|
|
|
|
|
|
/^\x{ffff}?/i
|
|
|
|
|
\x{ffff}
|
|
|
|
|
0: \x{ffff}
|
|
|
|
|
|
|
|
|
|
/^\x{ffff}*/i
|
|
|
|
|
\x{ffff}
|
|
|
|
|
0: \x{ffff}
|
|
|
|
|
|
|
|
|
|
/^\x{ffff}{3}/i
|
|
|
|
|
\x{ffff}\x{ffff}\x{ffff}
|
|
|
|
|
0: \x{ffff}\x{ffff}\x{ffff}
|
|
|
|
|
|
|
|
|
|
/^\x{ffff}{0,3}/i
|
|
|
|
|
\x{ffff}
|
|
|
|
|
0: \x{ffff}
|
|
|
|
|
|
|
|
|
|
/[^\x00-a]{12,}[^b-\xff]*/B
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
[b-\xff] (neg){12,}
|
|
|
|
|
[\x00-a] (neg)*+
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
[\x00-\x08\x0e-\x1f!-\xff] (neg)*
|
|
|
|
|
\s*
|
|
|
|
|
|
|
|
|
|
[0-9A-Z_a-z]++
|
|
|
|
|
\W+
|
|
|
|
|
|
|
|
|
|
[\x00-/:-\xff] (neg)*?
|
|
|
|
|
\d
|
|
|
|
|
0
|
|
|
|
|
[\x00-/:-@[-^`{-\xff] (neg){4,6}?
|
|
|
|
|
\w*
|
|
|
|
|
A
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
a*
|
|
|
|
|
[b-\xff\x{100}-\x{200}]?+
|
|
|
|
|
a#
|
|
|
|
|
a*+
|
|
|
|
|
[b-\xff\x{100}-\x{200}]?
|
|
|
|
|
b#
|
|
|
|
|
[a-f]*+
|
|
|
|
|
[g-\xff\x{100}-\x{200}]*+
|
|
|
|
|
#
|
|
|
|
|
[g-\xff\x{100}-\x{200}]*+
|
|
|
|
|
[a-c]*+
|
|
|
|
|
#
|
|
|
|
|
[g-\xff\x{100}-\x{200}]*
|
|
|
|
|
[a-h]*+
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
/^[\x{1234}\x{4321}]{2,4}?/
|
|
|
|
|
\x{1234}\x{1234}\x{1234}
|
|
|
|
|
0: \x{1234}\x{1234}
|
|
|
|
|
|
|
|
|
|
# Check maximum non-UTF character size for the 16-bit library.
|
|
|
|
|
|
|
|
|
|
/\x{ffff}/
|
|
|
|
|
A\x{ffff}B
|
|
|
|
|
0: \x{ffff}
|
|
|
|
|
|
|
|
|
|
/\x{10000}/
|
|
|
|
|
Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
|
|
|
|
|
|
|
|
|
|
/\o{20000}/
|
|
|
|
|
|
|
|
|
|
# Check maximum character size for the 32-bit library. These will all give
|
|
|
|
|
# errors in the 16-bit library.
|
|
|
|
|
|
|
|
|
|
/\x{110000}/
|
|
|
|
|
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
|
|
|
|
|
|
|
|
|
|
/\x{7fffffff}/
|
|
|
|
|
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
|
|
|
|
|
|
|
|
|
/\x{80000000}/
|
|
|
|
|
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
|
|
|
|
|
|
|
|
|
/\x{ffffffff}/
|
|
|
|
|
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
|
|
|
|
|
|
|
|
|
/\x{100000000}/
|
|
|
|
|
Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
|
|
|
|
|
|
|
|
|
|
/\o{17777777777}/
|
|
|
|
|
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
|
|
|
|
|
|
|
|
|
/\o{20000000000}/
|
|
|
|
|
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
|
|
|
|
|
|
|
|
|
/\o{37777777777}/
|
|
|
|
|
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
|
|
|
|
|
|
|
|
|
/\o{40000000000}/
|
|
|
|
|
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
|
|
|
|
|
|
|
|
|
/\x{7fffffff}\x{7fffffff}/I
|
|
|
|
|
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
|
|
|
|
|
|
|
|
|
/\x{80000000}\x{80000000}/I
|
|
|
|
|
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
|
|
|
|
|
|
|
|
|
/\x{ffffffff}\x{ffffffff}/I
|
|
|
|
|
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
|
|
|
|
|
|
|
|
|
# Non-UTF characters
|
|
|
|
|
|
2015-10-17 15:50:56 +02:00
|
|
|
|
/.{2,3}/
|
2014-08-05 18:51:32 +02:00
|
|
|
|
\x{400000}\x{400001}\x{400002}\x{400003}
|
|
|
|
|
** Character \x{400000} is greater than 0xffff and UTF-16 mode is not enabled.
|
|
|
|
|
** Truncation will probably give the wrong result.
|
|
|
|
|
** Character \x{400001} is greater than 0xffff and UTF-16 mode is not enabled.
|
|
|
|
|
** Truncation will probably give the wrong result.
|
|
|
|
|
** Character \x{400002} is greater than 0xffff and UTF-16 mode is not enabled.
|
|
|
|
|
** Truncation will probably give the wrong result.
|
|
|
|
|
** Character \x{400003} is greater than 0xffff and UTF-16 mode is not enabled.
|
|
|
|
|
** Truncation will probably give the wrong result.
|
|
|
|
|
0: \x00\x01\x02
|
|
|
|
|
|
|
|
|
|
/\x{400000}\x{800000}/IBi
|
|
|
|
|
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
|
|
|
|
|
|
|
|
|
|
# Check character ranges
|
|
|
|
|
|
|
|
|
|
/[\H]/IB
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
[\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Capturing subpattern count = 0
|
|
|
|
|
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
|
|
|
|
|
\x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
|
|
|
|
|
\x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
|
|
|
|
|
: ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^
|
|
|
|
|
_ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80
|
|
|
|
|
\x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f
|
|
|
|
|
\x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e
|
|
|
|
|
\x9f \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae
|
|
|
|
|
\xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd
|
|
|
|
|
\xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc
|
|
|
|
|
\xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb
|
|
|
|
|
\xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea
|
|
|
|
|
\xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9
|
|
|
|
|
\xfa \xfb \xfc \xfd \xfe \xff
|
|
|
|
|
Subject length lower bound = 1
|
|
|
|
|
|
|
|
|
|
/[\V]/IB
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Bra
|
|
|
|
|
[\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}]
|
|
|
|
|
Ket
|
|
|
|
|
End
|
|
|
|
|
------------------------------------------------------------------
|
|
|
|
|
Capturing subpattern count = 0
|
|
|
|
|
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0e
|
|
|
|
|
\x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
|
|
|
|
|
\x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = >
|
|
|
|
|
? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
|
|
|
|
|
d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
|
|
|
|
|
\x83 \x84 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92
|
|
|
|
|
\x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1
|
|
|
|
|
\xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0
|
|
|
|
|
\xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf
|
|
|
|
|
\xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce
|
|
|
|
|
\xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd
|
|
|
|
|
\xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec
|
|
|
|
|
\xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
|
|
|
|
|
\xfc \xfd \xfe \xff
|
|
|
|
|
Subject length lower bound = 1
|
|
|
|
|
|
2015-10-30 16:20:07 +01:00
|
|
|
|
/(*THEN:\[A]{65501})/expand
|
2015-10-28 10:25:31 +01:00
|
|
|
|
|
2016-08-03 11:01:02 +02:00
|
|
|
|
# We can use pcre2test's utf8_input modifier to create wide pattern characters,
|
|
|
|
|
# even though this test is run when UTF is not supported.
|
|
|
|
|
|
|
|
|
|
/ab<61><62><EFBFBD><EFBFBD><EFBFBD><EFBFBD>z/utf8_input
|
|
|
|
|
** Failed: character value greater than 0xffff cannot be converted to 16-bit in non-UTF mode
|
|
|
|
|
ab<61><62><EFBFBD><EFBFBD><EFBFBD><EFBFBD>z
|
|
|
|
|
ab\x{7fffffff}z
|
|
|
|
|
|
|
|
|
|
/ab<61><62><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>z/utf8_input
|
|
|
|
|
** Failed: invalid UTF-8 string cannot be converted to 16-bit string
|
|
|
|
|
ab<61><62><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>z
|
|
|
|
|
ab\x{ffffffff}z
|
|
|
|
|
|
|
|
|
|
/ab<61>Az/utf8_input
|
|
|
|
|
** Failed: invalid UTF-8 string cannot be converted to 16-bit string
|
|
|
|
|
ab<61>Az
|
|
|
|
|
ab\x{80000041}z
|
|
|
|
|
|
2014-08-05 18:51:32 +02:00
|
|
|
|
# End of testinput11
|