pcre2/testdata/testinput10

399 lines
5.9 KiB
Plaintext
Raw Normal View History

# This set of tests is for UTF-8 support and Unicode property support, with
# relevance only for the 8-bit library.
/X(\C{3})/utf
X\x{1234}
/X(\C{4})/utf
X\x{1234}YZ
/X\C*/utf
XYZabcdce
/X\C*?/utf
XYZabcde
/X\C{3,5}/utf
Xabcdefg
X\x{1234}
X\x{1234}YZ
X\x{1234}\x{512}
X\x{1234}\x{512}YZ
/X\C{3,5}?/utf
Xabcdefg
X\x{1234}
X\x{1234}YZ
X\x{1234}\x{512}
/a\Cb/utf
aXb
a\nb
/a\C\Cb/utf
a\x{100}b
/ab\Cde/utf
abXde
/a\C\Cb/utf
a\x{100}b
** Failers
a\x{12257}b
/[<5B>]/utf
/<2F>/utf
/<2F><><EFBFBD>xxx/utf
/badutf/utf
\xdf
\xef
\xef\x80
\xf7
\xf7\x80
\xf7\x80\x80
\xfb
\xfb\x80
\xfb\x80\x80
\xfb\x80\x80\x80
\xfd
\xfd\x80
\xfd\x80\x80
\xfd\x80\x80\x80
\xfd\x80\x80\x80\x80
\xdf\x7f
\xef\x7f\x80
\xef\x80\x7f
\xf7\x7f\x80\x80
\xf7\x80\x7f\x80
\xf7\x80\x80\x7f
\xfb\x7f\x80\x80\x80
\xfb\x80\x7f\x80\x80
\xfb\x80\x80\x7f\x80
\xfb\x80\x80\x80\x7f
\xfd\x7f\x80\x80\x80\x80
\xfd\x80\x7f\x80\x80\x80
\xfd\x80\x80\x7f\x80\x80
\xfd\x80\x80\x80\x7f\x80
\xfd\x80\x80\x80\x80\x7f
\xed\xa0\x80
\xc0\x8f
\xe0\x80\x8f
\xf0\x80\x80\x8f
\xf8\x80\x80\x80\x8f
\xfc\x80\x80\x80\x80\x8f
\x80
\xfe
\xff
/badutf/utf
\xfb\x80\x80\x80\x80
\xfd\x80\x80\x80\x80\x80
\xf7\xbf\xbf\xbf
/shortutf/utf
\xdf\=ph
\xef\=ph
\xef\x80\=ph
\xf7\=ph
\xf7\x80\=ph
\xf7\x80\x80\=ph
\xfb\=ph
\xfb\x80\=ph
\xfb\x80\x80\=ph
\xfb\x80\x80\x80\=ph
\xfd\=ph
\xfd\x80\=ph
\xfd\x80\x80\=ph
\xfd\x80\x80\x80\=ph
\xfd\x80\x80\x80\x80\=ph
/anything/utf
\xc0\x80
\xc1\x8f
\xe0\x9f\x80
\xf0\x8f\x80\x80
\xf8\x87\x80\x80\x80
\xfc\x83\x80\x80\x80\x80
\xfe\x80\x80\x80\x80\x80
\xff\x80\x80\x80\x80\x80
\xc3\x8f
\xe0\xaf\x80
\xe1\x80\x80
\xf0\x9f\x80\x80
\xf1\x8f\x80\x80
\xf8\x88\x80\x80\x80
\xf9\x87\x80\x80\x80
\xfc\x84\x80\x80\x80\x80
\xfd\x83\x80\x80\x80\x80
\xf8\x88\x80\x80\x80\=no_utf_check
\xf9\x87\x80\x80\x80\=no_utf_check
\xfc\x84\x80\x80\x80\x80\=no_utf_check
\xfd\x83\x80\x80\x80\x80\=no_utf_check
/\x{100}/IB,utf
/\x{1000}/IB,utf
/\x{10000}/IB,utf
/\x{100000}/IB,utf
/\x{10ffff}/IB,utf
/[\x{ff}]/IB,utf
/[\x{100}]/IB,utf
/\x80/IB,utf
/\xff/IB,utf
/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
\x{D55c}\x{ad6d}\x{C5B4}
/\x{65e5}\x{672c}\x{8a9e}/IB,utf
\x{65e5}\x{672c}\x{8a9e}
/\x{80}/IB,utf
/\x{084}/IB,utf
/\x{104}/IB,utf
/\x{861}/IB,utf
/\x{212ab}/IB,utf
# This one is here not because it's different to Perl, but because the way
# the captured single-byte is displayed. (In Perl it becomes a character, and you
# can't tell the difference.)
/X(\C)(.*)/utf
X\x{1234}
X\nabc
# This one is here because Perl gives out a grumbly error message (quite
# correctly, but that messes up comparisons).
/a\Cb/utf
*** Failers
a\x{100}b
/[^ab\xC0-\xF0]/IB,utf
\x{f1}
\x{bf}
\x{100}
\x{1000}
*** Failers
\x{c0}
\x{f0}
/Ā{3,4}/IB,utf
\x{100}\x{100}\x{100}\x{100\x{100}
/(\x{100}+|x)/IB,utf
/(\x{100}*a|x)/IB,utf
/(\x{100}{0,2}a|x)/IB,utf
/(\x{100}{1,2}a|x)/IB,utf
/\x{100}/IB,utf
/a\x{100}\x{101}*/IB,utf
/a\x{100}\x{101}+/IB,utf
/[^\x{c4}]/IB
/[\x{100}]/IB,utf
\x{100}
Z\x{100}
\x{100}Z
*** Failers
/[\xff]/IB,utf
>\x{ff}<
/[^\xff]/IB,utf
/\x{100}abc(xyz(?1))/IB,utf
/a\x{1234}b/utf,posix
a\x{1234}b
/\777/I,utf
\x{1ff}
\777
/\x{100}+\x{200}/IB,utf
/\x{100}+X/IB,utf
/^[\QĀ\E-\QŐ\E/B,utf
# This tests the stricter UTF-8 check according to RFC 3629.
/X/utf
\x{d800}
\x{d800}\=no_utf_check
\x{da00}
\x{da00}\=no_utf_check
\x{dfff}
\x{dfff}\=no_utf_check
\x{110000}
\x{110000}\=no_utf_check
\x{2000000}
\x{2000000}\=no_utf_check
\x{7fffffff}
\x{7fffffff}\=no_utf_check
/(*UTF8)\x{1234}/
abcd\x{1234}pqr
/(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
/\h/I,utf
ABC\x{09}
ABC\x{20}
ABC\x{a0}
ABC\x{1680}
ABC\x{180e}
ABC\x{2000}
ABC\x{202f}
ABC\x{205f}
ABC\x{3000}
/\v/I,utf
ABC\x{0a}
ABC\x{0b}
ABC\x{0c}
ABC\x{0d}
ABC\x{85}
ABC\x{2028}
/\h*A/I,utf
CDBABC
/\v+A/I,utf
/\s?xxx\s/I,utf
/\sxxx\s/I,utf,tables=2
AB\x{85}xxx\x{a0}XYZ
AB\x{a0}xxx\x{85}XYZ
/\S \S/I,utf,tables=2
\x{a2} \x{84}
A Z
/a+/utf
a\x{123}aa\=offset=1
a\x{123}aa\=offset=2
a\x{123}aa\=offset=3
a\x{123}aa\=offset=4
a\x{123}aa\=offset=5
a\x{123}aa\=offset=6
/\x{1234}+/Ii,utf
/\x{1234}+?/Ii,utf
/\x{1234}++/Ii,utf
/\x{1234}{2}/Ii,utf
/[^\x{c4}]/IB,utf
/X+\x{200}/IB,utf
/\R/I,utf
/\777/IB,utf
/\w+\x{C4}/B,utf
a\x{C4}\x{C4}
/\w+\x{C4}/B,utf,tables=2
a\x{C4}\x{C4}
/\W+\x{C4}/B,utf
!\x{C4}
/\W+\x{C4}/B,utf,tables=2
!\x{C4}
/\W+\x{A1}/B,utf
!\x{A1}
/\W+\x{A1}/B,utf,tables=2
!\x{A1}
/X\s+\x{A0}/B,utf
X\x20\x{A0}\x{A0}
/X\s+\x{A0}/B,utf,tables=2
X\x20\x{A0}\x{A0}
/\S+\x{A0}/B,utf
X\x{A0}\x{A0}
/\S+\x{A0}/B,utf,tables=2
X\x{A0}\x{A0}
/\x{a0}+\s!/B,utf
\x{a0}\x20!
/\x{a0}+\s!/B,utf,tables=2
\x{a0}\x20!
/A/utf
\x{ff000041}
\x{7f000041}
/(*UTF8)abc/never_utf
/abc/utf,never_utf
/\w/posix
+++\x{c2}
/\w/ucp,posix
+++\x{c2}
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
/AB\x{1fb0}/IB,utf
/AB\x{1fb0}/IBi,utf
/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
\x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
/[ⱥ]/Bi,utf
/[^ⱥ]/Bi,utf
/\h/I
/\v/I
/\R/I
/[[:blank:]]/B,ucp
/\x{212a}+/Ii,utf
KKkk\x{212a}
/s+/Ii,utf
SSss\x{17f}
# End of testinput10