# This set of tests is for UTF-8 support and Unicode property support, with # relevance only for the 8-bit library. /X(\C{3})/utf X\x{1234} /X(\C{4})/utf X\x{1234}YZ /X\C*/utf XYZabcdce /X\C*?/utf XYZabcde /X\C{3,5}/utf Xabcdefg X\x{1234} X\x{1234}YZ X\x{1234}\x{512} X\x{1234}\x{512}YZ /X\C{3,5}?/utf Xabcdefg X\x{1234} X\x{1234}YZ X\x{1234}\x{512} /a\Cb/utf aXb a\nb /a\C\Cb/utf a\x{100}b /ab\Cde/utf abXde /a\C\Cb/utf a\x{100}b \= Expect no match a\x{12257}b # The next 3 patterns have UTF-8 errors /[Ã]/utf /Ã/utf /ÃÃÃxxx/utf # Now test subjects /badutf/utf \= Expect UTF-8 errors X\xdf XX\xef XXX\xef\x80 X\xf7 XX\xf7\x80 XXX\xf7\x80\x80 \xfb \xfb\x80 \xfb\x80\x80 \xfb\x80\x80\x80 \xfd \xfd\x80 \xfd\x80\x80 \xfd\x80\x80\x80 \xfd\x80\x80\x80\x80 \xdf\x7f \xef\x7f\x80 \xef\x80\x7f \xf7\x7f\x80\x80 \xf7\x80\x7f\x80 \xf7\x80\x80\x7f \xfb\x7f\x80\x80\x80 \xfb\x80\x7f\x80\x80 \xfb\x80\x80\x7f\x80 \xfb\x80\x80\x80\x7f \xfd\x7f\x80\x80\x80\x80 \xfd\x80\x7f\x80\x80\x80 \xfd\x80\x80\x7f\x80\x80 \xfd\x80\x80\x80\x7f\x80 \xfd\x80\x80\x80\x80\x7f \xed\xa0\x80 \xc0\x8f \xe0\x80\x8f \xf0\x80\x80\x8f \xf8\x80\x80\x80\x8f \xfc\x80\x80\x80\x80\x8f \x80 \xfe \xff /badutf/utf \= Expect UTF-8 errors XX\xfb\x80\x80\x80\x80 XX\xfd\x80\x80\x80\x80\x80 XX\xf7\xbf\xbf\xbf /shortutf/utf \= Expect UTF-8 errors XX\xdf\=ph XX\xef\=ph XX\xef\x80\=ph \xf7\=ph \xf7\x80\=ph \xf7\x80\x80\=ph \xfb\=ph \xfb\x80\=ph \xfb\x80\x80\=ph \xfb\x80\x80\x80\=ph \xfd\=ph \xfd\x80\=ph \xfd\x80\x80\=ph \xfd\x80\x80\x80\=ph \xfd\x80\x80\x80\x80\=ph /anything/utf \= Expect UTF-8 errors X\xc0\x80 XX\xc1\x8f XXX\xe0\x9f\x80 \xf0\x8f\x80\x80 \xf8\x87\x80\x80\x80 \xfc\x83\x80\x80\x80\x80 \xfe\x80\x80\x80\x80\x80 \xff\x80\x80\x80\x80\x80 \xf8\x88\x80\x80\x80 \xf9\x87\x80\x80\x80 \xfc\x84\x80\x80\x80\x80 \xfd\x83\x80\x80\x80\x80 \= Expect no match \xc3\x8f \xe0\xaf\x80 \xe1\x80\x80 \xf0\x9f\x80\x80 \xf1\x8f\x80\x80 \xf8\x88\x80\x80\x80\=no_utf_check \xf9\x87\x80\x80\x80\=no_utf_check \xfc\x84\x80\x80\x80\x80\=no_utf_check \xfd\x83\x80\x80\x80\x80\=no_utf_check # Similar tests with offsets /badutf/utf \= Expect UTF-8 errors X\xdfabcd X\xdfabcd\=offset=1 \= Expect no match X\xdfabcd\=offset=2 /(?<=x)badutf/utf \= Expect UTF-8 errors X\xdfabcd X\xdfabcd\=offset=1 X\xdfabcd\=offset=2 X\xdfabcd\xdf\=offset=3 \= Expect no match X\xdfabcd\=offset=3 /(?<=xx)badutf/utf \= Expect UTF-8 errors X\xdfabcd X\xdfabcd\=offset=1 X\xdfabcd\=offset=2 X\xdfabcd\=offset=3 /(?<=xxxx)badutf/utf \= Expect UTF-8 errors X\xdfabcd X\xdfabcd\=offset=1 X\xdfabcd\=offset=2 X\xdfabcd\=offset=3 X\xdfabc\xdf\=offset=6 X\xdfabc\xdf\=offset=7 \= Expect no match X\xdfabcd\=offset=6 /\x{100}/IB,utf /\x{1000}/IB,utf /\x{10000}/IB,utf /\x{100000}/IB,utf /\x{10ffff}/IB,utf /[\x{ff}]/IB,utf /[\x{100}]/IB,utf /\x80/IB,utf /\xff/IB,utf /\x{D55c}\x{ad6d}\x{C5B4}/IB,utf \x{D55c}\x{ad6d}\x{C5B4} /\x{65e5}\x{672c}\x{8a9e}/IB,utf \x{65e5}\x{672c}\x{8a9e} /\x{80}/IB,utf /\x{084}/IB,utf /\x{104}/IB,utf /\x{861}/IB,utf /\x{212ab}/IB,utf # This one is here not because it's different to Perl, but because the way # the captured single-byte is displayed. (In Perl it becomes a character, and you # can't tell the difference.) /X(\C)(.*)/utf X\x{1234} X\nabc # This one is here because Perl gives out a grumbly error message (quite # correctly, but that messes up comparisons). /a\Cb/utf \= Expect no match a\x{100}b /[^ab\xC0-\xF0]/IB,utf \x{f1} \x{bf} \x{100} \x{1000} \= Expect no match \x{c0} \x{f0} /Ä€{3,4}/IB,utf \x{100}\x{100}\x{100}\x{100\x{100} /(\x{100}+|x)/IB,utf /(\x{100}*a|x)/IB,utf /(\x{100}{0,2}a|x)/IB,utf /(\x{100}{1,2}a|x)/IB,utf /\x{100}/IB,utf /a\x{100}\x{101}*/IB,utf /a\x{100}\x{101}+/IB,utf /[^\x{c4}]/IB /[\x{100}]/IB,utf \x{100} Z\x{100} \x{100}Z /[\xff]/IB,utf >\x{ff}< /[^\xff]/IB,utf /\x{100}abc(xyz(?1))/IB,utf /\777/I,utf \x{1ff} \777 /\x{100}+\x{200}/IB,utf /\x{100}+X/IB,utf /^[\QÄ€\E-\QÅ\E/B,utf # This tests the stricter UTF-8 check according to RFC 3629. /X/utf \= Expect UTF-8 errors \x{d800} \x{da00} \x{dfff} \x{110000} \x{2000000} \x{7fffffff} \= Expect no match \x{d800}\=no_utf_check \x{da00}\=no_utf_check \x{dfff}\=no_utf_check \x{110000}\=no_utf_check \x{2000000}\=no_utf_check \x{7fffffff}\=no_utf_check /(*UTF8)\x{1234}/ abcd\x{1234}pqr /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I /\h/I,utf ABC\x{09} ABC\x{20} ABC\x{a0} ABC\x{1680} ABC\x{180e} ABC\x{2000} ABC\x{202f} ABC\x{205f} ABC\x{3000} /\v/I,utf ABC\x{0a} ABC\x{0b} ABC\x{0c} ABC\x{0d} ABC\x{85} ABC\x{2028} /\h*A/I,utf CDBABC /\v+A/I,utf /\s?xxx\s/I,utf /\sxxx\s/I,utf,tables=2 AB\x{85}xxx\x{a0}XYZ AB\x{a0}xxx\x{85}XYZ /\S \S/I,utf,tables=2 \x{a2} \x{84} A Z /a+/utf a\x{123}aa\=offset=1 a\x{123}aa\=offset=3 a\x{123}aa\=offset=4 \= Expect bad offset value a\x{123}aa\=offset=6 \= Expect bad UTF-8 offset a\x{123}aa\=offset=2 \= Expect no match a\x{123}aa\=offset=5 /\x{1234}+/Ii,utf /\x{1234}+?/Ii,utf /\x{1234}++/Ii,utf /\x{1234}{2}/Ii,utf /[^\x{c4}]/IB,utf /X+\x{200}/IB,utf /\R/I,utf /\777/IB,utf /\w+\x{C4}/B,utf a\x{C4}\x{C4} /\w+\x{C4}/B,utf,tables=2 a\x{C4}\x{C4} /\W+\x{C4}/B,utf !\x{C4} /\W+\x{C4}/B,utf,tables=2 !\x{C4} /\W+\x{A1}/B,utf !\x{A1} /\W+\x{A1}/B,utf,tables=2 !\x{A1} /X\s+\x{A0}/B,utf X\x20\x{A0}\x{A0} /X\s+\x{A0}/B,utf,tables=2 X\x20\x{A0}\x{A0} /\S+\x{A0}/B,utf X\x{A0}\x{A0} /\S+\x{A0}/B,utf,tables=2 X\x{A0}\x{A0} /\x{a0}+\s!/B,utf \x{a0}\x20! /\x{a0}+\s!/B,utf,tables=2 \x{a0}\x20! /A/utf \x{ff000041} \x{7f000041} /(*UTF8)abc/never_utf /abc/utf,never_utf /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf /AB\x{1fb0}/IB,utf /AB\x{1fb0}/IBi,utf /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} /[â±¥]/Bi,utf /[^â±¥]/Bi,utf /\h/I /\v/I /\R/I /[[:blank:]]/B,ucp /\x{212a}+/Ii,utf KKkk\x{212a} /s+/Ii,utf SSss\x{17f} /\x{100}*A/IB,utf A /\x{100}*\d(?R)/IB,utf /[Z\x{100}]/IB,utf Z\x{100} \x{100} \x{100}Z /[z-\x{100}]/IB,utf /[z\Qa-d]Ä€\E]/IB,utf \x{100} Ä€ /[ab\x{100}]abc(xyz(?1))/IB,utf /\x{100}*\s/IB,utf /\x{100}*\d/IB,utf /\x{100}*\w/IB,utf /\x{100}*\D/IB,utf /\x{100}*\S/IB,utf /\x{100}*\W/IB,utf /[\x{105}-\x{109}]/IBi,utf \x{104} \x{105} \x{109} \= Expect no match \x{100} \x{10a} /[z-\x{100}]/IBi,utf Z z \x{39c} \x{178} | \x{80} \x{ff} \x{100} \x{101} \= Expect no match \x{102} Y y /[z-\x{100}]/IBi,utf /\x{3a3}B/IBi,utf /abc/utf,replace=à abc # End of testinput10