# This set of tests checks the API, internals, and non-Perl stuff for UTF # support, including Unicode properties. However, tests that give different # results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and # 12). # PCRE2 and Perl disagree about the characteristics of certain Unicode # characters. For example, 061C is considered by Perl to be Arabic, though # is it not listed as such in the Unicode Scripts.txt file, and 2066-2069 are # graphic and printable according to Perl, though they are actually "isolate" # control characters. That is why the following tests are here rather than in # test 4. /^[\p{Arabic}]/utf ** Failers \x{061c} /^[[:graph:]]+$/utf,ucp ** Failers \x{61c} \x{2066} \x{2067} \x{2068} \x{2069} /^[[:print:]]+$/utf,ucp ** Failers \x{61c} \x{2066} \x{2067} \x{2068} \x{2069} /^[[:^graph:]]+$/utf,ucp \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680} \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069} /^[[:^print:]]+$/utf,ucp \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} \x{2068}\x{2069} # Perl does not consider U+180e to be a space character. It is true that it # does not appear in the Unicode PropList.txt file as such, but in many other # sources it is listed as a space, and has been treated as such in PCRE for # a long time. /^>[[:blank:]]*/utf,ucp >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} /^A\s+Z/utf,ucp A\x{85}\x{180e}\x{2005}Z /^A[\s]+Z/utf,ucp A\x{2005}Z A\x{85}\x{2005}Z /^[[:graph:]]+$/utf,ucp \x{180e} /^[[:print:]]+$/utf,ucp \x{180e} /^[[:^graph:]]+$/utf,ucp \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e} /^[[:^print:]]+$/utf,ucp \x{180e} # End of U+180E tests. # --------------------------------------------------------------------- /\x{110000}/IB,utf /\o{4200000}/IB,utf /\x{ffffffff}/utf /\o{37777777777}/utf /\x{100000000}/utf /\o{77777777777}/utf /\x{d800}/utf /\o{154000}/utf /\x{dfff}/utf /\o{157777}/utf /\x{d7ff}/utf /\o{153777}/utf /\x{e000}/utf /\o{170000}/utf /^\x{100}a\x{1234}/utf \x{100}a\x{1234}bcd /\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf \x{0041}\x{2262}\x{0391}\x{002e} /.{3,5}X/IB,utf \x{212ab}\x{212ab}\x{212ab}\x{861}X /.{3,5}?/IB,utf \x{212ab}\x{212ab}\x{212ab}\x{861} /(?<=\C)X/utf Should produce an error diagnostic /^[ab]/IB,utf bar *** Failers c \x{ff} \x{100} /^[^ab]/IB,utf c \x{ff} \x{100} *** Failers aaa /\x{100}*(\d+|"(?1)")/utf 1234 "1234" \x{100}1234 "\x{100}1234" \x{100}\x{100}12ab \x{100}\x{100}"12" *** Failers \x{100}\x{100}abcd /\x{100}*/IB,utf /a\x{100}*/IB,utf /ab\x{100}*/IB,utf /[\x{200}-\x{100}]/utf /[Ā-Ą]/utf \x{100} \x{104} *** Failers \x{105} \x{ff} /[\xFF]/IB >\xff< /[^\xFF]/IB /[Ä-Ü]/utf Ö # Matches without Study \x{d6} /[Ä-Ü]/utf Ö <-- Same with Study \x{d6} /[\x{c4}-\x{dc}]/utf Ö # Matches without Study \x{d6} /[\x{c4}-\x{dc}]/utf Ö <-- Same with Study \x{d6} /[^\x{100}]abc(xyz(?1))/IB,utf /(\x{100}(b(?2)c))?/IB,utf /(\x{100}(b(?2)c)){0,2}/IB,utf /(\x{100}(b(?1)c))?/IB,utf /(\x{100}(b(?1)c)){0,2}/IB,utf /\W/utf A.B A\x{100}B /\w/utf \x{100}X /^\ሴ/IB,utf /()()()()()()()()()() ()()()()()()()()()() ()()()()()()()()()() ()()()()()()()()()() A (x) (?41) B/x,utf AxxB /^[\x{100}\E-\Q\E\x{150}]/B,utf /^[\QĀ\E-\QŐ\E]/B,utf /^abc./gmx,newline=any,utf abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK /abc.$/gmx,newline=any,utf abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9 /^a\Rb/bsr=unicode,utf a\nb a\rb a\r\nb a\x0bb a\x0cb a\x{85}b a\x{2028}b a\x{2029}b ** Failers a\n\rb /^a\R*b/bsr=unicode,utf ab a\nb a\rb a\r\nb a\x0bb a\x0c\x{2028}\x{2029}b a\x{85}b a\n\rb a\n\r\x{85}\x0cb /^a\R+b/bsr=unicode,utf a\nb a\rb a\r\nb a\x0bb a\x0c\x{2028}\x{2029}b a\x{85}b a\n\rb a\n\r\x{85}\x0cb ** Failers ab /^a\R{1,3}b/bsr=unicode,utf a\nb a\n\rb a\n\r\x{85}b a\r\n\r\nb a\r\n\r\n\r\nb a\n\r\n\rb a\n\n\r\nb ** Failers a\n\n\n\rb a\r /\H\h\V\v/utf X X\x0a X\x09X\x0b ** Failers \x{a0} X\x0a /\H*\h+\V?\v{3,4}/utf \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a \x09\x20\x{a0}\x0a\x0b\x0c ** Failers \x09\x20\x{a0}\x0a\x0b /\H\h\V\v/utf \x{3001}\x{3000}\x{2030}\x{2028} X\x{180e}X\x{85} ** Failers \x{2009} X\x0a /\H*\h+\V?\v{3,4}/utf \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a \x09\x20\x{202f}\x0a\x0b\x0c ** Failers \x09\x{200a}\x{a0}\x{2028}\x0b /[\h]/B,utf >\x{1680} /[\h]{3,}/B,utf >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}< /[\v]/B,utf /[\H]/B,utf /[\V]/B,utf /.*$/newline=any,utf \x{1ec5} /a\Rb/I,bsr=anycrlf,utf a\rb a\nb a\r\nb ** Failers a\x{85}b a\x0bb /a\Rb/I,bsr=unicode,utf a\rb a\nb a\r\nb a\x{85}b a\x0bb /a\R?b/I,bsr=anycrlf,utf a\rb a\nb a\r\nb ** Failers a\x{85}b a\x0bb /a\R?b/I,bsr=unicode,utf a\rb a\nb a\r\nb a\x{85}b a\x0bb ** Failers /.*a.*=.b.*/utf,newline=any QQQ\x{2029}ABCaXYZ=!bPQR ** Failers a\x{2029}b \x61\xe2\x80\xa9\x62 /[[:a\x{100}b:]]/utf /a[^]b/utf,alt_bsux,allow_empty_class,match_unset_backref a\x{1234}b a\nb ** Failers ab /a[^]+b/utf,alt_bsux,allow_empty_class,match_unset_backref aXb a\nX\nX\x{1234}b ** Failers ab /(\x{de})\1/ \x{de}\x{de} /X/newline=any,utf,firstline A\x{1ec5}ABCXYZ /Xa{2,4}b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps /Xa{2,4}?b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps /Xa{2,4}+b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps /X\x{123}{2,4}b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps /X\x{123}{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps /X\x{123}{2,4}+b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps /X\x{123}{2,4}b/utf Xx\=ps X\x{123}x\=ps X\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}\x{123}x\=ps /X\x{123}{2,4}?b/utf Xx\=ps X\x{123}x\=ps X\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}\x{123}x\=ps /X\x{123}{2,4}+b/utf Xx\=ps X\x{123}x\=ps X\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}x\=ps X\x{123}\x{123}\x{123}\x{123}x\=ps /X\d{2,4}b/utf X\=ps X3\=ps X33\=ps X333\=ps X3333\=ps /X\d{2,4}?b/utf X\=ps X3\=ps X33\=ps X333\=ps X3333\=ps /X\d{2,4}+b/utf X\=ps X3\=ps X33\=ps X333\=ps X3333\=ps /X\D{2,4}b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps /X\D{2,4}?b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps /X\D{2,4}+b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps /X\D{2,4}b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps /X\D{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps /X\D{2,4}+b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps /X[abc]{2,4}b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps /X[abc]{2,4}?b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps /X[abc]{2,4}+b/utf X\=ps Xa\=ps Xaa\=ps Xaaa\=ps Xaaaa\=ps /X[abc\x{123}]{2,4}b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps /X[abc\x{123}]{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps /X[abc\x{123}]{2,4}+b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps /X[^a]{2,4}b/utf X\=ps Xz\=ps Xzz\=ps Xzzz\=ps Xzzzz\=ps /X[^a]{2,4}?b/utf X\=ps Xz\=ps Xzz\=ps Xzzz\=ps Xzzzz\=ps /X[^a]{2,4}+b/utf X\=ps Xz\=ps Xzz\=ps Xzzz\=ps Xzzzz\=ps /X[^a]{2,4}b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps /X[^a]{2,4}?b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps /X[^a]{2,4}+b/utf X\=ps X\x{123}\=ps X\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\=ps X\x{123}\x{123}\x{123}\x{123}\=ps /(Y)X\1{2,4}b/utf YX\=ps YXY\=ps YXYY\=ps YXYYY\=ps YXYYYY\=ps /(Y)X\1{2,4}?b/utf YX\=ps YXY\=ps YXYY\=ps YXYYY\=ps YXYYYY\=ps /(Y)X\1{2,4}+b/utf YX\=ps YXY\=ps YXYY\=ps YXYYY\=ps YXYYYY\=ps /(\x{123})X\1{2,4}b/utf \x{123}X\=ps \x{123}X\x{123}\=ps \x{123}X\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps /(\x{123})X\1{2,4}?b/utf \x{123}X\=ps \x{123}X\x{123}\=ps \x{123}X\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps /(\x{123})X\1{2,4}+b/utf \x{123}X\=ps \x{123}X\x{123}\=ps \x{123}X\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\=ps \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps /\bthe cat\b/utf the cat\=ps the cat\=ph /abcd*/utf xxxxabcd\=ps xxxxabcd\=ph /abcd*/i,utf xxxxabcd\=ps xxxxabcd\=ph XXXXABCD\=ps XXXXABCD\=ph /abc\d*/utf xxxxabc1\=ps xxxxabc1\=ph /(a)bc\1*/utf xxxxabca\=ps xxxxabca\=ph /abc[de]*/utf xxxxabcde\=ps xxxxabcde\=ph /X\W{3}X/utf X\=ps /\sxxx\s/utf,tables=2 AB\x{85}xxx\x{a0}XYZ AB\x{a0}xxx\x{85}XYZ /\S \S/utf,tables=2 \x{a2} \x{84} 'A#хц'Bx,newline=any,utf 'A#хц PQ'Bx,newline=any,utf /a+#хaa z#XX?/Bx,newline=any,utf /a+#хaa z#х?/Bx,newline=any,utf /\g{A}xxx#bXX(?'A'123) (?'A'456)/Bx,newline=any,utf /\g{A}xxx#bх(?'A'123) (?'A'456)/Bx,newline=any,utf /^\cģ/utf /(\R*)(.)/s,utf \r\n \r\r\n\n\r \r\r\n\n\r\n /(\R)*(.)/s,utf \r\n \r\r\n\n\r \r\r\n\n\r\n /[^\x{1234}]+/Ii,utf /[^\x{1234}]+?/Ii,utf /[^\x{1234}]++/Ii,utf /[^\x{1234}]{2}/Ii,utf /f.*/ for\=ph /f.*/s for\=ph /f.*/utf for\=ph /f.*/s,utf for\=ph /\x{d7ff}\x{e000}/utf /\x{d800}/utf /\x{dfff}/utf /\h+/utf \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} /[\h\x{e000}]+/B,utf \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} /\H+/utf \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} \x{2000}\x{200a}\x{1fff}\x{200b} \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} /[\H\x{d7ff}]+/B,utf \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} \x{2000}\x{200a}\x{1fff}\x{200b} \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} /\v+/utf \x{2027}\x{2030}\x{2028}\x{2029} \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d /[\v\x{e000}]+/B,utf \x{2027}\x{2030}\x{2028}\x{2029} \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d /\V+/utf \x{2028}\x{2029}\x{2027}\x{2030} \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} /[\V\x{d7ff}]+/B,utf \x{2028}\x{2029}\x{2027}\x{2030} \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} /\R+/bsr=unicode,utf \x{2027}\x{2030}\x{2028}\x{2029} \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d /(..)\1/utf ab\=ps aba\=ps abab\=ps /(..)\1/i,utf ab\=ps abA\=ps aBAb\=ps /(..)\1{2,}/utf ab\=ps aba\=ps abab\=ps ababa\=ps ababab\=ps ababab\=ph abababa\=ps abababa\=ph /(..)\1{2,}/i,utf ab\=ps aBa\=ps aBAb\=ps AbaBA\=ps abABAb\=ps aBAbaB\=ph abABabA\=ps abaBABa\=ph /(..)\1{2,}?x/i,utf ab\=ps abA\=ps aBAb\=ps abaBA\=ps abAbaB\=ps abaBabA\=ps abAbABaBx\=ps /./utf,newline=crlf \r\=ps \r\=ph /.{2,3}/utf,newline=crlf \r\=ps \r\=ph \r\r\=ps \r\r\=ph \r\r\r\=ps \r\r\r\=ph /.{2,3}?/utf,newline=crlf \r\=ps \r\=ph \r\r\=ps \r\r\=ph \r\r\r\=ps \r\r\r\=ph /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf /(?<=\x{1234}\x{1234})\bxy/I,utf /(?<!^)ETA/utf ETA /\u0100/B,utf,alt_bsux,allow_empty_class,match_unset_backref /[\u0100-\u0200]/B,utf,alt_bsux,allow_empty_class,match_unset_backref /\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref /^a+[a\x{200}]/B,utf aa /[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/B,utf /[\p{L}]/IB /[\p{^L}]/IB /[\P{L}]/IB /[\P{^L}]/IB /[abc\p{L}\x{0660}]/IB,utf /[\p{Nd}]/IB,utf 1234 /[\p{Nd}+-]+/IB,utf 1234 12-34 12+\x{661}-34 ** Failers abcd /(?:[\PPa*]*){8,}/ /[\P{Any}]/B /[\P{Any}\E]/B /(\P{Yi}+\277)/ /(\P{Yi}+\277)?/ /(?<=\P{Yi}{3}A)X/ /\p{Yi}+(\P{Yi}+)(?1)/ /(\P{Yi}{2}\277)?/ /[\P{Yi}A]/ /[\P{Yi}\P{Yi}\P{Yi}A]/ /[^\P{Yi}A]/ /[^\P{Yi}\P{Yi}\P{Yi}A]/ /(\P{Yi}*\277)*/ /(\P{Yi}*?\277)*/ /(\p{Yi}*+\277)*/ /(\P{Yi}?\277)*/ /(\P{Yi}??\277)*/ /(\p{Yi}?+\277)*/ /(\P{Yi}{0,3}\277)*/ /(\P{Yi}{0,3}?\277)*/ /(\p{Yi}{0,3}+\277)*/ /\p{Zl}{2,3}+/B,utf \x{2028}\x{2028}\x{2028} /\p{Zl}/B,utf /\p{Lu}{3}+/B,utf /\pL{2}+/B,utf /\p{Cc}{2}+/B,utf /^\p{Cf}/utf \x{180e} \x{061c} \x{2066} \x{2067} \x{2068} \x{2069} /^\p{Cs}/utf \x{dfff}\=no_utf_check ** Failers \x{09f} /^\p{Mn}/utf \x{1a1b} /^\p{Pe}/utf \x{2309} \x{230b} /^\p{Ps}/utf \x{2308} \x{230a} /^\p{Sc}+/utf $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6} \x{9f2} ** Failers X \x{2c2} /^\p{Zs}/utf \ \ \x{a0} \x{1680} \x{2000} \x{2001} ** Failers \x{2028} \x{200d} # These are here because Perl has problems with the negative versions of the # properties and has changed how it behaves for caseless matching. /\p{^Lu}/i,utf 1234 ** Failers ABC /\P{Lu}/i,utf 1234 ** Failers ABC /\p{Ll}/i,utf a Az ** Failers ABC /\p{Lu}/i,utf A a\x{10a0}B ** Failers a \x{1d00} /\p{Lu}/i,utf A aZ ** Failers abc /[\x{c0}\x{391}]/i,utf \x{c0} \x{e0} # The next two are special cases where the lengths of the different cases of # the same character differ. The first went wrong with heap frame storage; the # second was broken in all cases. /^\x{023a}+?(\x{0130}+)/i,utf \x{023a}\x{2c65}\x{0130} /^\x{023a}+([^X])/i,utf \x{023a}\x{2c65}X /\x{c0}+\x{116}+/i,utf \x{c0}\x{e0}\x{116}\x{117} /[\x{c0}\x{116}]+/i,utf \x{c0}\x{e0}\x{116}\x{117} /(\x{de})\1/i,utf \x{de}\x{de} \x{de}\x{fe} \x{fe}\x{fe} \x{fe}\x{de} /^\x{c0}$/i,utf \x{c0} \x{e0} /^\x{e0}$/i,utf \x{c0} \x{e0} # The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE # will match it only with UCP support, because without that it has no notion # of case for anything other than the ASCII letters. /((?i)[\x{c0}])/utf \x{c0} \x{e0} /(?i:[\x{c0}])/utf \x{c0} \x{e0} # These are PCRE's extra properties to help with Unicodizing \d etc. /^\p{Xan}/utf ABCD 1234 \x{6ca} \x{a6c} \x{10a7} ** Failers _ABC /^\p{Xan}+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ ** Failers _ABC /^\p{Xan}+?/utf \x{6ca}\x{a6c}\x{10a7}_ /^\p{Xan}*/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ /^\p{Xan}{2,9}/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ /^\p{Xan}{2,9}?/utf \x{6ca}\x{a6c}\x{10a7}_ /^[\p{Xan}]/utf ABCD1234_ 1234abcd_ \x{6ca} \x{a6c} \x{10a7} ** Failers _ABC /^[\p{Xan}]+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ ** Failers _ABC /^>\p{Xsp}/utf >\x{1680}\x{2028}\x{0b} >\x{a0} ** Failers \x{0b} /^>\p{Xsp}+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xsp}+?/utf >\x{1680}\x{2028}\x{0b} /^>\p{Xsp}*/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xsp}{2,9}/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xsp}{2,9}?/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>[\p{Xsp}]/utf >\x{2028}\x{0b} /^>[\p{Xsp}]+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xps}/utf >\x{1680}\x{2028}\x{0b} >\x{a0} ** Failers \x{0b} /^>\p{Xps}+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xps}+?/utf >\x{1680}\x{2028}\x{0b} /^>\p{Xps}*/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xps}{2,9}/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xps}{2,9}?/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>[\p{Xps}]/utf >\x{2028}\x{0b} /^>[\p{Xps}]+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^\p{Xwd}/utf ABCD 1234 \x{6ca} \x{a6c} \x{10a7} _ABC ** Failers [] /^\p{Xwd}+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ /^\p{Xwd}+?/utf \x{6ca}\x{a6c}\x{10a7}_ /^\p{Xwd}*/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ /^\p{Xwd}{2,9}/utf A_B12\x{6ca}\x{a6c}\x{10a7} /^\p{Xwd}{2,9}?/utf \x{6ca}\x{a6c}\x{10a7}_ /^[\p{Xwd}]/utf ABCD1234_ 1234abcd_ \x{6ca} \x{a6c} \x{10a7} _ABC ** Failers [] /^[\p{Xwd}]+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ # A check not in UTF-8 mode /^[\p{Xwd}]+/ ABCD1234_ # Some negative checks /^[\P{Xwd}]+/utf !.+\x{019}\x{35a}AB /^[\p{^Xwd}]+/utf !.+\x{019}\x{35a}AB /[\D]/B,utf,ucp 1\x{3c8}2 /[\d]/B,utf,ucp >\x{6f4}< /[\S]/B,utf,ucp \x{1680}\x{6f4}\x{1680} /[\s]/B,utf,ucp >\x{1680}< /[\W]/B,utf,ucp A\x{1712}B /[\w]/B,utf,ucp >\x{1723}< /\D/B,utf,ucp 1\x{3c8}2 /\d/B,utf,ucp >\x{6f4}< /\S/B,utf,ucp \x{1680}\x{6f4}\x{1680} /\s/B,utf,ucp >\x{1680}> /\W/B,utf,ucp A\x{1712}B /\w/B,utf,ucp >\x{1723}< /[[:alpha:]]/B,ucp /[[:lower:]]/B,ucp /[[:upper:]]/B,ucp /[[:alnum:]]/B,ucp /[[:ascii:]]/B,ucp /[[:cntrl:]]/B,ucp /[[:digit:]]/B,ucp /[[:graph:]]/B,ucp /[[:print:]]/B,ucp /[[:punct:]]/B,ucp /[[:space:]]/B,ucp /[[:word:]]/B,ucp /[[:xdigit:]]/B,ucp # Unicode properties for \b abd \B /\b...\B/utf,ucp abc_ \x{37e}abc\x{376} \x{37e}\x{376}\x{371}\x{393}\x{394} !\x{c0}++\x{c1}\x{c2} !\x{c0}+++++ # Without PCRE_UCP, non-ASCII always fail, even if < 256 /\b...\B/utf abc_ ** Failers \x{37e}abc\x{376} \x{37e}\x{376}\x{371}\x{393}\x{394} !\x{c0}++\x{c1}\x{c2} !\x{c0}+++++ # With PCRE_UCP, non-UTF8 chars that are < 256 still check properties /\b...\B/ucp abc_ !\x{c0}++\x{c1}\x{c2} !\x{c0}+++++ # Some of these are silly, but they check various combinations /[[:^alpha:][:^cntrl:]]+/B,utf,ucp 123 abc /[[:^cntrl:][:^alpha:]]+/B,utf,ucp 123 abc /[[:alpha:]]+/B,utf,ucp abc /[[:^alpha:]\S]+/B,utf,ucp 123 abc /[^\d]+/B,utf,ucp abc123 abc\x{123} \x{660}abc /\p{Lu}+9\p{Lu}+B\p{Lu}+b/B /\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B /\P{Lu}+9\P{Lu}+B\P{Lu}+b/B /\p{Han}+X\p{Greek}+\x{370}/B,utf /\p{Xan}+!\p{Xan}+A/B /\p{Xsp}+!\p{Xsp}\t/B /\p{Xps}+!\p{Xps}\t/B /\p{Xwd}+!\p{Xwd}_/B /A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp # These behaved oddly in Perl, so they are kept in this test /(\x{23a}\x{23a}\x{23a})?\1/i,utf \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} /(ȺȺȺ)?\1/i,utf ȺȺȺⱥⱥ /(\x{23a}\x{23a}\x{23a})?\1/i,utf \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} /(ȺȺȺ)?\1/i,utf ȺȺȺⱥⱥⱥ /(\x{23a}\x{23a}\x{23a})\1/i,utf \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} /(ȺȺȺ)\1/i,utf ȺȺȺⱥⱥ /(\x{23a}\x{23a}\x{23a})\1/i,utf \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} /(ȺȺȺ)\1/i,utf ȺȺȺⱥⱥⱥ /(\x{2c65}\x{2c65})\1/i,utf \x{2c65}\x{2c65}\x{23a}\x{23a} /(ⱥⱥ)\1/i,utf ⱥⱥȺȺ /(\x{23a}\x{23a}\x{23a})\1Y/i,utf X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ /(\x{2c65}\x{2c65})\1Y/i,utf X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ # These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE /^[\p{Batak}]/utf \x{1bc0} \x{1bff} ** Failers \x{1bf4} /^[\p{Brahmi}]/utf \x{11000} \x{1106f} ** Failers \x{1104e} /^[\p{Mandaic}]/utf \x{840} \x{85e} ** Failers \x{85c} \x{85d} /(\X*)(.)/s,utf A\x{300} /^S(\X*)e(\X*)$/utf Stéréo /^\X/utf ́réo /^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames aX41z *** Failers aAz /(?<=ab\Cde)X/utf /\X/ a\=ps a\=ph /\Xa/ aa\=ps aa\=ph /\X{2}/ aa\=ps aa\=ph /\X+a/ a\=ps aa\=ps aa\=ph /\X+?a/ a\=ps ab\=ps aa\=ps aa\=ph aba\=ps # These Unicode 6.1.0 scripts are not known to Perl. /\p{Chakma}\d/utf,ucp \x{11100}\x{1113c} /\p{Takri}\d/utf,ucp \x{11680}\x{116c0} /^\X/utf A\=ps A\=ph A\x{300}\x{301}\=ps A\x{300}\x{301}\=ph A\x{301}\=ps A\x{301}\=ph /^\X{2,3}/utf A\=ps A\=ph AA\=ps AA\=ph A\x{300}\x{301}\=ps A\x{300}\x{301}\=ph A\x{300}\x{301}A\x{300}\x{301}\=ps A\x{300}\x{301}A\x{300}\x{301}\=ph /^\X{2}/utf AA\=ps AA\=ph A\x{300}\x{301}A\x{300}\x{301}\=ps A\x{300}\x{301}A\x{300}\x{301}\=ph /^\X+/utf AA\=ps AA\=ph /^\X+?Z/utf AA\=ps AA\=ph /A\x{3a3}B/IBi,utf /[\x{3a3}]/Bi,utf /[^\x{3a3}]/Bi,utf /[\x{3a3}]+/Bi,utf /[^\x{3a3}]+/Bi,utf /a*\x{3a3}/Bi,utf /\x{3a3}+a/Bi,utf /\x{3a3}*\x{3c2}/Bi,utf /\x{3a3}{3}/i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} /\x{3a3}{2,4}/i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} /\x{3a3}{2,4}?/i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} /\x{3a3}+./i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} /\x{3a3}++./i,utf,aftertext ** Failers \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} /\x{3a3}*\x{3c2}/Bi,utf /[^\x{3a3}]*\x{3c2}/Bi,utf /[^a]*\x{3c2}/Bi,utf /ist/Bi,utf ikt /is+t/i,utf iSs\x{17f}t ikt /is+?t/i,utf ikt /is?t/i,utf ikt /is{2}t/i,utf iskt # This property is a PCRE special /^\p{Xuc}/utf $abc @abc `abc \x{1234}abc ** Failers abc /^\p{Xuc}+/utf $@`\x{a0}\x{1234}\x{e000}** ** Failers \x{9f} /^\p{Xuc}+?/utf $@`\x{a0}\x{1234}\x{e000}** ** Failers \x{9f} /^\p{Xuc}+?\*/utf $@`\x{a0}\x{1234}\x{e000}** ** Failers \x{9f} /^\p{Xuc}++/utf $@`\x{a0}\x{1234}\x{e000}** ** Failers \x{9f} /^\p{Xuc}{3,5}/utf $@`\x{a0}\x{1234}\x{e000}** ** Failers \x{9f} /^\p{Xuc}{3,5}?/utf $@`\x{a0}\x{1234}\x{e000}** ** Failers \x{9f} /^[\p{Xuc}]/utf $@`\x{a0}\x{1234}\x{e000}** ** Failers \x{9f} /^[\p{Xuc}]+/utf $@`\x{a0}\x{1234}\x{e000}** ** Failers \x{9f} /^\P{Xuc}/utf abc ** Failers $abc @abc `abc \x{1234}abc /^[\P{Xuc}]/utf abc ** Failers $abc @abc `abc \x{1234}abc # Some auto-possessification tests /\pN+\z/B /\PN+\z/B /\pN+/B /\PN+/B /\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp /\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp /\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp /\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp /\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp /\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp /\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp /\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp /\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp /\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp /\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp /\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp /\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp /\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp /\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp /\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp /\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp /\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp /\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp /\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp # End auto-possessification tests /\w+/B,utf,ucp,auto_callout abcd /[\p{N}]?+/B,no_auto_possess /[\p{L}ab]{2,3}+/B,no_auto_possess /\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \C+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx /.+\X/Bsx /\X+$/Bmx /\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\C \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx /\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp /[RST]+/Bi,utf,ucp /[R-T]+/Bi,utf,ucp /[Q-U]+/Bi,utf,ucp /^s?c/Iim,utf scat /\X?abc/utf,no_start_optimize \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 /\x{100}\x{200}\K\x{300}/utf,startchar \x{100}\x{200}\x{300} # Test UTF characters in a substitution /ábc/utf,replace=XሴZ 123ábc123 /(?<=abc)(|def)/g,utf,replace=<$0> 123abcáyzabcdef789abcሴqr /[^\xff]((?1))/utf,debug # End of testinput5