pcre2/testdata/testinput10

499 lines
7.4 KiB
Plaintext
Raw Normal View History

# This set of tests is for UTF-8 support and Unicode property support, with
# relevance only for the 8-bit library.
/X(\C{3})/utf
X\x{1234}
/X(\C{4})/utf
X\x{1234}YZ
/X\C*/utf
XYZabcdce
/X\C*?/utf
XYZabcde
/X\C{3,5}/utf
Xabcdefg
X\x{1234}
X\x{1234}YZ
X\x{1234}\x{512}
X\x{1234}\x{512}YZ
/X\C{3,5}?/utf
Xabcdefg
X\x{1234}
X\x{1234}YZ
X\x{1234}\x{512}
/a\Cb/utf
aXb
a\nb
/a\C\Cb/utf
a\x{100}b
/ab\Cde/utf
abXde
/a\C\Cb/utf
a\x{100}b
\= Expect no match
a\x{12257}b
# The next 3 patterns have UTF-8 errors
/[<5B>]/utf
/<2F>/utf
/<2F><><EFBFBD>xxx/utf
# Now test subjects
/badutf/utf
\= Expect UTF-8 errors
2014-11-23 19:38:38 +01:00
X\xdf
XX\xef
XXX\xef\x80
X\xf7
XX\xf7\x80
XXX\xf7\x80\x80
\xfb
\xfb\x80
\xfb\x80\x80
\xfb\x80\x80\x80
\xfd
\xfd\x80
\xfd\x80\x80
\xfd\x80\x80\x80
\xfd\x80\x80\x80\x80
\xdf\x7f
\xef\x7f\x80
\xef\x80\x7f
\xf7\x7f\x80\x80
\xf7\x80\x7f\x80
\xf7\x80\x80\x7f
\xfb\x7f\x80\x80\x80
\xfb\x80\x7f\x80\x80
\xfb\x80\x80\x7f\x80
\xfb\x80\x80\x80\x7f
\xfd\x7f\x80\x80\x80\x80
\xfd\x80\x7f\x80\x80\x80
\xfd\x80\x80\x7f\x80\x80
\xfd\x80\x80\x80\x7f\x80
\xfd\x80\x80\x80\x80\x7f
\xed\xa0\x80
\xc0\x8f
\xe0\x80\x8f
\xf0\x80\x80\x8f
\xf8\x80\x80\x80\x8f
\xfc\x80\x80\x80\x80\x8f
\x80
\xfe
\xff
/badutf/utf
\= Expect UTF-8 errors
2014-11-23 19:38:38 +01:00
XX\xfb\x80\x80\x80\x80
XX\xfd\x80\x80\x80\x80\x80
XX\xf7\xbf\xbf\xbf
/shortutf/utf
\= Expect UTF-8 errors
2014-11-23 19:38:38 +01:00
XX\xdf\=ph
XX\xef\=ph
XX\xef\x80\=ph
\xf7\=ph
\xf7\x80\=ph
\xf7\x80\x80\=ph
\xfb\=ph
\xfb\x80\=ph
\xfb\x80\x80\=ph
\xfb\x80\x80\x80\=ph
\xfd\=ph
\xfd\x80\=ph
\xfd\x80\x80\=ph
\xfd\x80\x80\x80\=ph
\xfd\x80\x80\x80\x80\=ph
/anything/utf
\= Expect UTF-8 errors
2014-11-23 19:38:38 +01:00
X\xc0\x80
XX\xc1\x8f
XXX\xe0\x9f\x80
\xf0\x8f\x80\x80
\xf8\x87\x80\x80\x80
\xfc\x83\x80\x80\x80\x80
\xfe\x80\x80\x80\x80\x80
\xff\x80\x80\x80\x80\x80
\xf8\x88\x80\x80\x80
\xf9\x87\x80\x80\x80
\xfc\x84\x80\x80\x80\x80
\xfd\x83\x80\x80\x80\x80
\= Expect no match
\xc3\x8f
\xe0\xaf\x80
\xe1\x80\x80
\xf0\x9f\x80\x80
\xf1\x8f\x80\x80
\xf8\x88\x80\x80\x80\=no_utf_check
\xf9\x87\x80\x80\x80\=no_utf_check
\xfc\x84\x80\x80\x80\x80\=no_utf_check
\xfd\x83\x80\x80\x80\x80\=no_utf_check
# Similar tests with offsets
/badutf/utf
\= Expect UTF-8 errors
X\xdfabcd
X\xdfabcd\=offset=1
\= Expect no match
X\xdfabcd\=offset=2
/(?<=x)badutf/utf
\= Expect UTF-8 errors
X\xdfabcd
X\xdfabcd\=offset=1
X\xdfabcd\=offset=2
X\xdfabcd\xdf\=offset=3
\= Expect no match
X\xdfabcd\=offset=3
/(?<=xx)badutf/utf
\= Expect UTF-8 errors
X\xdfabcd
X\xdfabcd\=offset=1
X\xdfabcd\=offset=2
X\xdfabcd\=offset=3
/(?<=xxxx)badutf/utf
\= Expect UTF-8 errors
X\xdfabcd
X\xdfabcd\=offset=1
X\xdfabcd\=offset=2
X\xdfabcd\=offset=3
X\xdfabc\xdf\=offset=6
X\xdfabc\xdf\=offset=7
\= Expect no match
X\xdfabcd\=offset=6
/\x{100}/IB,utf
/\x{1000}/IB,utf
/\x{10000}/IB,utf
/\x{100000}/IB,utf
/\x{10ffff}/IB,utf
/[\x{ff}]/IB,utf
/[\x{100}]/IB,utf
/\x80/IB,utf
/\xff/IB,utf
/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
\x{D55c}\x{ad6d}\x{C5B4}
/\x{65e5}\x{672c}\x{8a9e}/IB,utf
\x{65e5}\x{672c}\x{8a9e}
/\x{80}/IB,utf
/\x{084}/IB,utf
/\x{104}/IB,utf
/\x{861}/IB,utf
/\x{212ab}/IB,utf
# This one is here not because it's different to Perl, but because the way
# the captured single-byte is displayed. (In Perl it becomes a character, and you
# can't tell the difference.)
/X(\C)(.*)/utf
X\x{1234}
X\nabc
# This one is here because Perl gives out a grumbly error message (quite
# correctly, but that messes up comparisons).
/a\Cb/utf
\= Expect no match
a\x{100}b
/[^ab\xC0-\xF0]/IB,utf
\x{f1}
\x{bf}
\x{100}
\x{1000}
\= Expect no match
\x{c0}
\x{f0}
/Ā{3,4}/IB,utf
\x{100}\x{100}\x{100}\x{100\x{100}
/(\x{100}+|x)/IB,utf
/(\x{100}*a|x)/IB,utf
/(\x{100}{0,2}a|x)/IB,utf
/(\x{100}{1,2}a|x)/IB,utf
/\x{100}/IB,utf
/a\x{100}\x{101}*/IB,utf
/a\x{100}\x{101}+/IB,utf
/[^\x{c4}]/IB
/[\x{100}]/IB,utf
\x{100}
Z\x{100}
\x{100}Z
/[\xff]/IB,utf
>\x{ff}<
/[^\xff]/IB,utf
/\x{100}abc(xyz(?1))/IB,utf
/\777/I,utf
\x{1ff}
\777
/\x{100}+\x{200}/IB,utf
/\x{100}+X/IB,utf
/^[\QĀ\E-\QŐ\E/B,utf
# This tests the stricter UTF-8 check according to RFC 3629.
/X/utf
\= Expect UTF-8 errors
\x{d800}
\x{da00}
\x{dfff}
\x{110000}
\x{2000000}
\x{7fffffff}
\= Expect no match
\x{d800}\=no_utf_check
\x{da00}\=no_utf_check
\x{dfff}\=no_utf_check
\x{110000}\=no_utf_check
\x{2000000}\=no_utf_check
\x{7fffffff}\=no_utf_check
/(*UTF8)\x{1234}/
abcd\x{1234}pqr
/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
/\h/I,utf
ABC\x{09}
ABC\x{20}
ABC\x{a0}
ABC\x{1680}
ABC\x{180e}
ABC\x{2000}
ABC\x{202f}
ABC\x{205f}
ABC\x{3000}
/\v/I,utf
ABC\x{0a}
ABC\x{0b}
ABC\x{0c}
ABC\x{0d}
ABC\x{85}
ABC\x{2028}
/\h*A/I,utf
CDBABC
/\v+A/I,utf
/\s?xxx\s/I,utf
/\sxxx\s/I,utf,tables=2
AB\x{85}xxx\x{a0}XYZ
AB\x{a0}xxx\x{85}XYZ
/\S \S/I,utf,tables=2
\x{a2} \x{84}
A Z
/a+/utf
a\x{123}aa\=offset=1
a\x{123}aa\=offset=3
a\x{123}aa\=offset=4
\= Expect bad offset value
a\x{123}aa\=offset=6
\= Expect bad UTF-8 offset
a\x{123}aa\=offset=2
\= Expect no match
a\x{123}aa\=offset=5
/\x{1234}+/Ii,utf
/\x{1234}+?/Ii,utf
/\x{1234}++/Ii,utf
/\x{1234}{2}/Ii,utf
/[^\x{c4}]/IB,utf
/X+\x{200}/IB,utf
/\R/I,utf
/\777/IB,utf
/\w+\x{C4}/B,utf
a\x{C4}\x{C4}
/\w+\x{C4}/B,utf,tables=2
a\x{C4}\x{C4}
/\W+\x{C4}/B,utf
!\x{C4}
/\W+\x{C4}/B,utf,tables=2
!\x{C4}
/\W+\x{A1}/B,utf
!\x{A1}
/\W+\x{A1}/B,utf,tables=2
!\x{A1}
/X\s+\x{A0}/B,utf
X\x20\x{A0}\x{A0}
/X\s+\x{A0}/B,utf,tables=2
X\x20\x{A0}\x{A0}
/\S+\x{A0}/B,utf
X\x{A0}\x{A0}
/\S+\x{A0}/B,utf,tables=2
X\x{A0}\x{A0}
/\x{a0}+\s!/B,utf
\x{a0}\x20!
/\x{a0}+\s!/B,utf,tables=2
\x{a0}\x20!
/A/utf
\x{ff000041}
\x{7f000041}
/(*UTF8)abc/never_utf
/abc/utf,never_utf
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
/AB\x{1fb0}/IB,utf
/AB\x{1fb0}/IBi,utf
/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
\x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
/[ⱥ]/Bi,utf
/[^ⱥ]/Bi,utf
/\h/I
/\v/I
/\R/I
/[[:blank:]]/B,ucp
/\x{212a}+/Ii,utf
KKkk\x{212a}
/s+/Ii,utf
SSss\x{17f}
2014-08-06 11:35:11 +02:00
/\x{100}*A/IB,utf
A
/\x{100}*\d(?R)/IB,utf
/[Z\x{100}]/IB,utf
Z\x{100}
\x{100}
\x{100}Z
/[z-\x{100}]/IB,utf
/[z\Qa-d]Ā\E]/IB,utf
\x{100}
Ā
/[ab\x{100}]abc(xyz(?1))/IB,utf
/\x{100}*\s/IB,utf
/\x{100}*\d/IB,utf
/\x{100}*\w/IB,utf
/\x{100}*\D/IB,utf
/\x{100}*\S/IB,utf
/\x{100}*\W/IB,utf
/[\x{105}-\x{109}]/IBi,utf
\x{104}
\x{105}
\x{109}
\= Expect no match
2014-08-06 11:35:11 +02:00
\x{100}
\x{10a}
/[z-\x{100}]/IBi,utf
Z
z
\x{39c}
\x{178}
|
\x{80}
\x{ff}
\x{100}
\x{101}
\= Expect no match
2014-08-06 11:35:11 +02:00
\x{102}
Y
y
/[z-\x{100}]/IBi,utf
/\x{3a3}B/IBi,utf
2014-11-12 17:57:56 +01:00
/abc/utf,replace=<3D>
abc
2014-11-12 17:57:56 +01:00
# End of testinput10