370 lines
5.0 KiB
Plaintext
370 lines
5.0 KiB
Plaintext
|
/-- This set of tests is for UTF-8 support but not Unicode property support,
|
|||
|
and is relevant only to the 8-bit library. --/
|
|||
|
|
|||
|
< forbid W
|
|||
|
|
|||
|
/X(\C{3})/8
|
|||
|
X\x{1234}
|
|||
|
|
|||
|
/X(\C{4})/8
|
|||
|
X\x{1234}YZ
|
|||
|
|
|||
|
/X\C*/8
|
|||
|
XYZabcdce
|
|||
|
|
|||
|
/X\C*?/8
|
|||
|
XYZabcde
|
|||
|
|
|||
|
/X\C{3,5}/8
|
|||
|
Xabcdefg
|
|||
|
X\x{1234}
|
|||
|
X\x{1234}YZ
|
|||
|
X\x{1234}\x{512}
|
|||
|
X\x{1234}\x{512}YZ
|
|||
|
|
|||
|
/X\C{3,5}?/8
|
|||
|
Xabcdefg
|
|||
|
X\x{1234}
|
|||
|
X\x{1234}YZ
|
|||
|
X\x{1234}\x{512}
|
|||
|
|
|||
|
/a\Cb/8
|
|||
|
aXb
|
|||
|
a\nb
|
|||
|
|
|||
|
/a\C\Cb/8
|
|||
|
a\x{100}b
|
|||
|
|
|||
|
/ab\Cde/8
|
|||
|
abXde
|
|||
|
|
|||
|
/a\C\Cb/8
|
|||
|
a\x{100}b
|
|||
|
** Failers
|
|||
|
a\x{12257}b
|
|||
|
|
|||
|
/[<5B>]/8
|
|||
|
|
|||
|
/<2F>/8
|
|||
|
|
|||
|
/<2F><><EFBFBD>xxx/8
|
|||
|
|
|||
|
/<2F><><EFBFBD>xxx/8?DZSSO
|
|||
|
|
|||
|
/badutf/8
|
|||
|
\xdf
|
|||
|
\xef
|
|||
|
\xef\x80
|
|||
|
\xf7
|
|||
|
\xf7\x80
|
|||
|
\xf7\x80\x80
|
|||
|
\xfb
|
|||
|
\xfb\x80
|
|||
|
\xfb\x80\x80
|
|||
|
\xfb\x80\x80\x80
|
|||
|
\xfd
|
|||
|
\xfd\x80
|
|||
|
\xfd\x80\x80
|
|||
|
\xfd\x80\x80\x80
|
|||
|
\xfd\x80\x80\x80\x80
|
|||
|
\xdf\x7f
|
|||
|
\xef\x7f\x80
|
|||
|
\xef\x80\x7f
|
|||
|
\xf7\x7f\x80\x80
|
|||
|
\xf7\x80\x7f\x80
|
|||
|
\xf7\x80\x80\x7f
|
|||
|
\xfb\x7f\x80\x80\x80
|
|||
|
\xfb\x80\x7f\x80\x80
|
|||
|
\xfb\x80\x80\x7f\x80
|
|||
|
\xfb\x80\x80\x80\x7f
|
|||
|
\xfd\x7f\x80\x80\x80\x80
|
|||
|
\xfd\x80\x7f\x80\x80\x80
|
|||
|
\xfd\x80\x80\x7f\x80\x80
|
|||
|
\xfd\x80\x80\x80\x7f\x80
|
|||
|
\xfd\x80\x80\x80\x80\x7f
|
|||
|
\xed\xa0\x80
|
|||
|
\xc0\x8f
|
|||
|
\xe0\x80\x8f
|
|||
|
\xf0\x80\x80\x8f
|
|||
|
\xf8\x80\x80\x80\x8f
|
|||
|
\xfc\x80\x80\x80\x80\x8f
|
|||
|
\x80
|
|||
|
\xfe
|
|||
|
\xff
|
|||
|
|
|||
|
/badutf/8
|
|||
|
\xfb\x80\x80\x80\x80
|
|||
|
\xfd\x80\x80\x80\x80\x80
|
|||
|
\xf7\xbf\xbf\xbf
|
|||
|
|
|||
|
/shortutf/8
|
|||
|
\P\P\xdf
|
|||
|
\P\P\xef
|
|||
|
\P\P\xef\x80
|
|||
|
\P\P\xf7
|
|||
|
\P\P\xf7\x80
|
|||
|
\P\P\xf7\x80\x80
|
|||
|
\P\P\xfb
|
|||
|
\P\P\xfb\x80
|
|||
|
\P\P\xfb\x80\x80
|
|||
|
\P\P\xfb\x80\x80\x80
|
|||
|
\P\P\xfd
|
|||
|
\P\P\xfd\x80
|
|||
|
\P\P\xfd\x80\x80
|
|||
|
\P\P\xfd\x80\x80\x80
|
|||
|
\P\P\xfd\x80\x80\x80\x80
|
|||
|
|
|||
|
/anything/8
|
|||
|
\xc0\x80
|
|||
|
\xc1\x8f
|
|||
|
\xe0\x9f\x80
|
|||
|
\xf0\x8f\x80\x80
|
|||
|
\xf8\x87\x80\x80\x80
|
|||
|
\xfc\x83\x80\x80\x80\x80
|
|||
|
\xfe\x80\x80\x80\x80\x80
|
|||
|
\xff\x80\x80\x80\x80\x80
|
|||
|
\xc3\x8f
|
|||
|
\xe0\xaf\x80
|
|||
|
\xe1\x80\x80
|
|||
|
\xf0\x9f\x80\x80
|
|||
|
\xf1\x8f\x80\x80
|
|||
|
\xf8\x88\x80\x80\x80
|
|||
|
\xf9\x87\x80\x80\x80
|
|||
|
\xfc\x84\x80\x80\x80\x80
|
|||
|
\xfd\x83\x80\x80\x80\x80
|
|||
|
\?\xf8\x88\x80\x80\x80
|
|||
|
\?\xf9\x87\x80\x80\x80
|
|||
|
\?\xfc\x84\x80\x80\x80\x80
|
|||
|
\?\xfd\x83\x80\x80\x80\x80
|
|||
|
|
|||
|
/\x{100}/8DZ
|
|||
|
|
|||
|
/\x{1000}/8DZ
|
|||
|
|
|||
|
/\x{10000}/8DZ
|
|||
|
|
|||
|
/\x{100000}/8DZ
|
|||
|
|
|||
|
/\x{10ffff}/8DZ
|
|||
|
|
|||
|
/[\x{ff}]/8DZ
|
|||
|
|
|||
|
/[\x{100}]/8DZ
|
|||
|
|
|||
|
/\x80/8DZ
|
|||
|
|
|||
|
/\xff/8DZ
|
|||
|
|
|||
|
/\x{D55c}\x{ad6d}\x{C5B4}/DZ8
|
|||
|
\x{D55c}\x{ad6d}\x{C5B4}
|
|||
|
|
|||
|
/\x{65e5}\x{672c}\x{8a9e}/DZ8
|
|||
|
\x{65e5}\x{672c}\x{8a9e}
|
|||
|
|
|||
|
/\x{80}/DZ8
|
|||
|
|
|||
|
/\x{084}/DZ8
|
|||
|
|
|||
|
/\x{104}/DZ8
|
|||
|
|
|||
|
/\x{861}/DZ8
|
|||
|
|
|||
|
/\x{212ab}/DZ8
|
|||
|
|
|||
|
/-- This one is here not because it's different to Perl, but because the way
|
|||
|
the captured single-byte is displayed. (In Perl it becomes a character, and you
|
|||
|
can't tell the difference.) --/
|
|||
|
|
|||
|
/X(\C)(.*)/8
|
|||
|
X\x{1234}
|
|||
|
X\nabc
|
|||
|
|
|||
|
/-- This one is here because Perl gives out a grumbly error message (quite
|
|||
|
correctly, but that messes up comparisons). --/
|
|||
|
|
|||
|
/a\Cb/8
|
|||
|
*** Failers
|
|||
|
a\x{100}b
|
|||
|
|
|||
|
/[^ab\xC0-\xF0]/8SDZ
|
|||
|
\x{f1}
|
|||
|
\x{bf}
|
|||
|
\x{100}
|
|||
|
\x{1000}
|
|||
|
*** Failers
|
|||
|
\x{c0}
|
|||
|
\x{f0}
|
|||
|
|
|||
|
/Ā{3,4}/8SDZ
|
|||
|
\x{100}\x{100}\x{100}\x{100\x{100}
|
|||
|
|
|||
|
/(\x{100}+|x)/8SDZ
|
|||
|
|
|||
|
/(\x{100}*a|x)/8SDZ
|
|||
|
|
|||
|
/(\x{100}{0,2}a|x)/8SDZ
|
|||
|
|
|||
|
/(\x{100}{1,2}a|x)/8SDZ
|
|||
|
|
|||
|
/\x{100}/8DZ
|
|||
|
|
|||
|
/a\x{100}\x{101}*/8DZ
|
|||
|
|
|||
|
/a\x{100}\x{101}+/8DZ
|
|||
|
|
|||
|
/[^\x{c4}]/DZ
|
|||
|
|
|||
|
/[\x{100}]/8DZ
|
|||
|
\x{100}
|
|||
|
Z\x{100}
|
|||
|
\x{100}Z
|
|||
|
*** Failers
|
|||
|
|
|||
|
/[\xff]/DZ8
|
|||
|
>\x{ff}<
|
|||
|
|
|||
|
/[^\xff]/8DZ
|
|||
|
|
|||
|
/\x{100}abc(xyz(?1))/8DZ
|
|||
|
|
|||
|
/a\x{1234}b/P8
|
|||
|
a\x{1234}b
|
|||
|
|
|||
|
/\777/8I
|
|||
|
\x{1ff}
|
|||
|
\777
|
|||
|
|
|||
|
/\x{100}+\x{200}/8DZ
|
|||
|
|
|||
|
/\x{100}+X/8DZ
|
|||
|
|
|||
|
/^[\QĀ\E-\QŐ\E/BZ8
|
|||
|
|
|||
|
/-- This tests the stricter UTF-8 check according to RFC 3629. --/
|
|||
|
|
|||
|
/X/8
|
|||
|
\x{d800}
|
|||
|
\x{d800}\?
|
|||
|
\x{da00}
|
|||
|
\x{da00}\?
|
|||
|
\x{dfff}
|
|||
|
\x{dfff}\?
|
|||
|
\x{110000}
|
|||
|
\x{110000}\?
|
|||
|
\x{2000000}
|
|||
|
\x{2000000}\?
|
|||
|
\x{7fffffff}
|
|||
|
\x{7fffffff}\?
|
|||
|
|
|||
|
/(*UTF8)\x{1234}/
|
|||
|
abcd\x{1234}pqr
|
|||
|
|
|||
|
/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
|
|||
|
|
|||
|
/\h/SI8
|
|||
|
ABC\x{09}
|
|||
|
ABC\x{20}
|
|||
|
ABC\x{a0}
|
|||
|
ABC\x{1680}
|
|||
|
ABC\x{180e}
|
|||
|
ABC\x{2000}
|
|||
|
ABC\x{202f}
|
|||
|
ABC\x{205f}
|
|||
|
ABC\x{3000}
|
|||
|
|
|||
|
/\v/SI8
|
|||
|
ABC\x{0a}
|
|||
|
ABC\x{0b}
|
|||
|
ABC\x{0c}
|
|||
|
ABC\x{0d}
|
|||
|
ABC\x{85}
|
|||
|
ABC\x{2028}
|
|||
|
|
|||
|
/\h*A/SI8
|
|||
|
CDBABC
|
|||
|
|
|||
|
/\v+A/SI8
|
|||
|
|
|||
|
/\s?xxx\s/8SI
|
|||
|
|
|||
|
/\sxxx\s/I8ST1
|
|||
|
AB\x{85}xxx\x{a0}XYZ
|
|||
|
AB\x{a0}xxx\x{85}XYZ
|
|||
|
|
|||
|
/\S \S/I8ST1
|
|||
|
\x{a2} \x{84}
|
|||
|
A Z
|
|||
|
|
|||
|
/a+/8
|
|||
|
a\x{123}aa\>1
|
|||
|
a\x{123}aa\>2
|
|||
|
a\x{123}aa\>3
|
|||
|
a\x{123}aa\>4
|
|||
|
a\x{123}aa\>5
|
|||
|
a\x{123}aa\>6
|
|||
|
|
|||
|
/\x{1234}+/iS8I
|
|||
|
|
|||
|
/\x{1234}+?/iS8I
|
|||
|
|
|||
|
/\x{1234}++/iS8I
|
|||
|
|
|||
|
/\x{1234}{2}/iS8I
|
|||
|
|
|||
|
/[^\x{c4}]/8DZ
|
|||
|
|
|||
|
/X+\x{200}/8DZ
|
|||
|
|
|||
|
/\R/SI8
|
|||
|
|
|||
|
/\777/8DZ
|
|||
|
|
|||
|
/\w+\x{C4}/8BZ
|
|||
|
a\x{C4}\x{C4}
|
|||
|
|
|||
|
/\w+\x{C4}/8BZT1
|
|||
|
a\x{C4}\x{C4}
|
|||
|
|
|||
|
/\W+\x{C4}/8BZ
|
|||
|
!\x{C4}
|
|||
|
|
|||
|
/\W+\x{C4}/8BZT1
|
|||
|
!\x{C4}
|
|||
|
|
|||
|
/\W+\x{A1}/8BZ
|
|||
|
!\x{A1}
|
|||
|
|
|||
|
/\W+\x{A1}/8BZT1
|
|||
|
!\x{A1}
|
|||
|
|
|||
|
/X\s+\x{A0}/8BZ
|
|||
|
X\x20\x{A0}\x{A0}
|
|||
|
|
|||
|
/X\s+\x{A0}/8BZT1
|
|||
|
X\x20\x{A0}\x{A0}
|
|||
|
|
|||
|
/\S+\x{A0}/8BZ
|
|||
|
X\x{A0}\x{A0}
|
|||
|
|
|||
|
/\S+\x{A0}/8BZT1
|
|||
|
X\x{A0}\x{A0}
|
|||
|
|
|||
|
/\x{a0}+\s!/8BZ
|
|||
|
\x{a0}\x20!
|
|||
|
|
|||
|
/\x{a0}+\s!/8BZT1
|
|||
|
\x{a0}\x20!
|
|||
|
|
|||
|
/A/8
|
|||
|
\x{ff000041}
|
|||
|
\x{7f000041}
|
|||
|
|
|||
|
/(*UTF8)abc/9
|
|||
|
|
|||
|
/abc/89
|
|||
|
|
|||
|
//8+L
|
|||
|
\xf1\xad\xae\xae
|
|||
|
|
|||
|
/-- End of testinput15 --/
|