# This set of tests checks the API, internals, and non-Perl stuff for UTF # support, including Unicode properties. However, tests that give different # results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and # 12). #newline_default lf any anycrlf # PCRE2 and Perl disagree about the characteristics of certain Unicode # characters. For example, 061C was considered by Perl to be Arabic, though # it was not listed as such in the Unicode Scripts.txt file for Unicode 8. # However, it *is* in that file for Unicode 10, but when I came to re-check, # Perl had changed in the meantime, with 5.026 not recognizing it as Arabic. # 2066-2069 are graphic and printable according to Perl, though they are # actually "isolate" control characters. That is why the following tests are # here rather than in test 4. /^[\p{Arabic}]/utf \x{061c} 0: \x{61c} /^[[:graph:]]+$/utf,ucp \= Expect no match \x{61c} No match \x{2066} No match \x{2067} No match \x{2068} No match \x{2069} No match /^[[:print:]]+$/utf,ucp \= Expect no match \x{61c} No match \x{2066} No match \x{2067} No match \x{2068} No match \x{2069} No match /^[[:^graph:]]+$/utf,ucp \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680} 0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{61c}\x{1680} \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069} 0: \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069} /^[[:^print:]]+$/utf,ucp \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} 0: \x{09}\x{1d}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} \x{2068}\x{2069} 0: \x{2068}\x{2069} # Perl does not consider U+180e to be a space character. It is true that it # does not appear in the Unicode PropList.txt file as such, but in many other # sources it is listed as a space, and has been treated as such in PCRE for # a long time. /^>[[:blank:]]*/utf,ucp >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} 0: > \x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{09} /^A\s+Z/utf,ucp A\x{85}\x{180e}\x{2005}Z 0: A\x{85}\x{180e}\x{2005}Z /^A[\s]+Z/utf,ucp A\x{2005}Z 0: A\x{2005}Z A\x{85}\x{2005}Z 0: A\x{85}\x{2005}Z /^[[:graph:]]+$/utf,ucp \= Expect no match \x{180e} No match /^[[:print:]]+$/utf,ucp \x{180e} 0: \x{180e} /^[[:^graph:]]+$/utf,ucp \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e} 0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{61c}\x{1680}\x{180e} /^[[:^print:]]+$/utf,ucp \= Expect no match \x{180e} No match # End of U+180E tests. # --------------------------------------------------------------------- /\x{110000}/IB,utf Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large /\o{4200000}/IB,utf Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large /\x{ffffffff}/utf Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large /\o{37777777777}/utf Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large /\x{100000000}/utf Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large /\o{77777777777}/utf Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large /\x{d800}/utf Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) /\o{154000}/utf Failed: error 173 at offset 9: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) /\x{dfff}/utf Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) /\o{157777}/utf Failed: error 173 at offset 9: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) /\x{d7ff}/utf /\o{153777}/utf /\x{e000}/utf /\o{170000}/utf /^\x{100}a\x{1234}/utf \x{100}a\x{1234}bcd 0: \x{100}a\x{1234} /\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf ------------------------------------------------------------------ Bra A\x{2262}\x{391}. Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf First code unit = 'A' Last code unit = '.' Subject length lower bound = 4 \x{0041}\x{2262}\x{0391}\x{002e} 0: A\x{2262}\x{391}. /.{3,5}X/IB,utf ------------------------------------------------------------------ Bra Any{3} Any{0,2} X Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf Last code unit = 'X' Subject length lower bound = 4 \x{212ab}\x{212ab}\x{212ab}\x{861}X 0: \x{212ab}\x{212ab}\x{212ab}\x{861}X /.{3,5}?/IB,utf ------------------------------------------------------------------ Bra Any{3} Any{0,2}? Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf Subject length lower bound = 3 \x{212ab}\x{212ab}\x{212ab}\x{861} 0: \x{212ab}\x{212ab}\x{212ab} /^[ab]/IB,utf ------------------------------------------------------------------ Bra ^ [ab] Ket End ------------------------------------------------------------------ Capture group count = 0 Compile options: utf Overall options: anchored utf Starting code units: a b Subject length lower bound = 1 bar 0: b \= Expect no match c No match \x{ff} No match \x{100} No match /\x{100}*(\d+|"(?1)")/utf 1234 0: 1234 1: 1234 "1234" 0: "1234" 1: "1234" \x{100}1234 0: \x{100}1234 1: 1234 "\x{100}1234" 0: \x{100}1234 1: 1234 \x{100}\x{100}12ab 0: \x{100}\x{100}12 1: 12 \x{100}\x{100}"12" 0: \x{100}\x{100}"12" 1: "12" \= Expect no match \x{100}\x{100}abcd No match /\x{100}*/IB,utf ------------------------------------------------------------------ Bra \x{100}*+ Ket End ------------------------------------------------------------------ Capture group count = 0 May match empty string Options: utf Subject length lower bound = 0 /a\x{100}*/IB,utf ------------------------------------------------------------------ Bra a \x{100}*+ Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf First code unit = 'a' Subject length lower bound = 1 /ab\x{100}*/IB,utf ------------------------------------------------------------------ Bra ab \x{100}*+ Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf First code unit = 'a' Last code unit = 'b' Subject length lower bound = 2 /[\x{200}-\x{100}]/utf Failed: error 108 at offset 15: range out of order in character class /[Ā-Ą]/utf \x{100} 0: \x{100} \x{104} 0: \x{104} \= Expect no match \x{105} No match \x{ff} No match /[\xFF]/IB ------------------------------------------------------------------ Bra \x{ff} Ket End ------------------------------------------------------------------ Capture group count = 0 First code unit = \xff Subject length lower bound = 1 >\xff< 0: \xff /[^\xFF]/IB ------------------------------------------------------------------ Bra [^\x{ff}] Ket End ------------------------------------------------------------------ Capture group count = 0 Subject length lower bound = 1 /[Ä-Ü]/utf Ö # Matches without Study 0: \x{d6} \x{d6} 0: \x{d6} /[Ä-Ü]/utf Ö <-- Same with Study 0: \x{d6} \x{d6} 0: \x{d6} /[\x{c4}-\x{dc}]/utf Ö # Matches without Study 0: \x{d6} \x{d6} 0: \x{d6} /[\x{c4}-\x{dc}]/utf Ö <-- Same with Study 0: \x{d6} \x{d6} 0: \x{d6} /[^\x{100}]abc(xyz(?1))/IB,utf ------------------------------------------------------------------ Bra [^\x{100}] abc CBra 1 xyz Recurse Ket Ket End ------------------------------------------------------------------ Capture group count = 1 Options: utf Last code unit = 'z' Subject length lower bound = 7 /(\x{100}(b(?2)c))?/IB,utf ------------------------------------------------------------------ Bra Brazero CBra 1 \x{100} CBra 2 b Recurse c Ket Ket Ket End ------------------------------------------------------------------ Capture group count = 2 May match empty string Options: utf Subject length lower bound = 0 /(\x{100}(b(?2)c)){0,2}/IB,utf ------------------------------------------------------------------ Bra Brazero Bra CBra 1 \x{100} CBra 2 b Recurse c Ket Ket Brazero CBra 1 \x{100} CBra 2 b Recurse c Ket Ket Ket Ket End ------------------------------------------------------------------ Capture group count = 2 May match empty string Options: utf Subject length lower bound = 0 /(\x{100}(b(?1)c))?/IB,utf ------------------------------------------------------------------ Bra Brazero CBra 1 \x{100} CBra 2 b Recurse c Ket Ket Ket End ------------------------------------------------------------------ Capture group count = 2 May match empty string Options: utf Subject length lower bound = 0 /(\x{100}(b(?1)c)){0,2}/IB,utf ------------------------------------------------------------------ Bra Brazero Bra CBra 1 \x{100} CBra 2 b Recurse c Ket Ket Brazero CBra 1 \x{100} CBra 2 b Recurse c Ket Ket Ket Ket End ------------------------------------------------------------------ Capture group count = 2 May match empty string Options: utf Subject length lower bound = 0 /\W/utf A.B 0: . A\x{100}B 0: \x{100} /\w/utf \x{100}X 0: X # Use no_start_optimize because the first code unit is different in 8-bit from # the wider modes. /^\ሴ/IB,utf,no_start_optimize ------------------------------------------------------------------ Bra ^ \x{1234} Ket End ------------------------------------------------------------------ Capture group count = 0 Compile options: no_start_optimize utf Overall options: anchored no_start_optimize utf /()()()()()()()()()() ()()()()()()()()()() ()()()()()()()()()() ()()()()()()()()()() A (x) (?41) B/x,utf AxxB Matched, but too many substrings 0: AxxB 1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: /^[\x{100}\E-\Q\E\x{150}]/B,utf ------------------------------------------------------------------ Bra ^ [\x{100}-\x{150}] Ket End ------------------------------------------------------------------ /^[\QĀ\E-\QŐ\E]/B,utf ------------------------------------------------------------------ Bra ^ [\x{100}-\x{150}] Ket End ------------------------------------------------------------------ /^abc./gmx,newline=any,utf abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK 0: abc1 0: abc2 0: abc3 0: abc4 0: abc5 0: abc6 0: abc7 0: abc8 0: abc9 /abc.$/gmx,newline=any,utf abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9 0: abc1 0: abc2 0: abc3 0: abc4 0: abc5 0: abc6 0: abc7 0: abc8 0: abc9 /^a\Rb/bsr=unicode,utf a\nb 0: a\x{0a}b a\rb 0: a\x{0d}b a\r\nb 0: a\x{0d}\x{0a}b a\x0bb 0: a\x{0b}b a\x0cb 0: a\x{0c}b a\x{85}b 0: a\x{85}b a\x{2028}b 0: a\x{2028}b a\x{2029}b 0: a\x{2029}b \= Expect no match a\n\rb No match /^a\R*b/bsr=unicode,utf ab 0: ab a\nb 0: a\x{0a}b a\rb 0: a\x{0d}b a\r\nb 0: a\x{0d}\x{0a}b a\x0bb 0: a\x{0b}b a\x0c\x{2028}\x{2029}b 0: a\x{0c}\x{2028}\x{2029}b a\x{85}b 0: a\x{85}b a\n\rb 0: a\x{0a}\x{0d}b a\n\r\x{85}\x0cb 0: a\x{0a}\x{0d}\x{85}\x{0c}b /^a\R+b/bsr=unicode,utf a\nb 0: a\x{0a}b a\rb 0: a\x{0d}b a\r\nb 0: a\x{0d}\x{0a}b a\x0bb 0: a\x{0b}b a\x0c\x{2028}\x{2029}b 0: a\x{0c}\x{2028}\x{2029}b a\x{85}b 0: a\x{85}b a\n\rb 0: a\x{0a}\x{0d}b a\n\r\x{85}\x0cb 0: a\x{0a}\x{0d}\x{85}\x{0c}b \= Expect no match ab No match /^a\R{1,3}b/bsr=unicode,utf a\nb 0: a\x{0a}b a\n\rb 0: a\x{0a}\x{0d}b a\n\r\x{85}b 0: a\x{0a}\x{0d}\x{85}b a\r\n\r\nb 0: a\x{0d}\x{0a}\x{0d}\x{0a}b a\r\n\r\n\r\nb 0: a\x{0d}\x{0a}\x{0d}\x{0a}\x{0d}\x{0a}b a\n\r\n\rb 0: a\x{0a}\x{0d}\x{0a}\x{0d}b a\n\n\r\nb 0: a\x{0a}\x{0a}\x{0d}\x{0a}b \= Expect no match a\n\n\n\rb No match a\r No match /\H\h\V\v/utf X X\x0a 0: X X\x{0a} X\x09X\x0b 0: X\x{09}X\x{0b} \= Expect no match \x{a0} X\x0a No match /\H*\h+\V?\v{3,4}/utf \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a 0: \x{09} \x{a0}X\x{0a}\x{0b}\x{0c}\x{0d} \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c}\x{0d} \x09\x20\x{a0}\x0a\x0b\x0c 0: \x{09} \x{a0}\x{0a}\x{0b}\x{0c} \= Expect no match \x09\x20\x{a0}\x0a\x0b No match /\H\h\V\v/utf \x{3001}\x{3000}\x{2030}\x{2028} 0: \x{3001}\x{3000}\x{2030}\x{2028} X\x{180e}X\x{85} 0: X\x{180e}X\x{85} \= Expect no match \x{2009} X\x0a No match /\H*\h+\V?\v{3,4}/utf \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a 0: \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x{0c}\x{0d} \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a 0: \x{09}\x{205f}\x{a0}\x{0a}\x{2029}\x{0c}\x{2028} \x09\x20\x{202f}\x0a\x0b\x0c 0: \x{09} \x{202f}\x{0a}\x{0b}\x{0c} \= Expect no match \x09\x{200a}\x{a0}\x{2028}\x0b No match /[\h]/B,utf ------------------------------------------------------------------ Bra [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] Ket End ------------------------------------------------------------------ >\x{1680} 0: \x{1680} /[\h]{3,}/B,utf ------------------------------------------------------------------ Bra [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]{3,}+ Ket End ------------------------------------------------------------------ >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}< 0: \x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000} /[\v]/B,utf ------------------------------------------------------------------ Bra [\x0a-\x0d\x85\x{2028}-\x{2029}] Ket End ------------------------------------------------------------------ /[\H]/B,utf ------------------------------------------------------------------ Bra [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}] Ket End ------------------------------------------------------------------ /[\V]/B,utf ------------------------------------------------------------------ Bra [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}] Ket End ------------------------------------------------------------------ /.*$/newline=any,utf \x{1ec5} 0: \x{1ec5} /a\Rb/I,bsr=anycrlf,utf Capture group count = 0 Options: utf \R matches CR, LF, or CRLF First code unit = 'a' Last code unit = 'b' Subject length lower bound = 3 a\rb 0: a\x{0d}b a\nb 0: a\x{0a}b a\r\nb 0: a\x{0d}\x{0a}b \= Expect no match a\x{85}b No match a\x0bb No match /a\Rb/I,bsr=unicode,utf Capture group count = 0 Options: utf \R matches any Unicode newline First code unit = 'a' Last code unit = 'b' Subject length lower bound = 3 a\rb 0: a\x{0d}b a\nb 0: a\x{0a}b a\r\nb 0: a\x{0d}\x{0a}b a\x{85}b 0: a\x{85}b a\x0bb 0: a\x{0b}b /a\R?b/I,bsr=anycrlf,utf Capture group count = 0 Options: utf \R matches CR, LF, or CRLF First code unit = 'a' Last code unit = 'b' Subject length lower bound = 2 a\rb 0: a\x{0d}b a\nb 0: a\x{0a}b a\r\nb 0: a\x{0d}\x{0a}b \= Expect no match a\x{85}b No match a\x0bb No match /a\R?b/I,bsr=unicode,utf Capture group count = 0 Options: utf \R matches any Unicode newline First code unit = 'a' Last code unit = 'b' Subject length lower bound = 2 a\rb 0: a\x{0d}b a\nb 0: a\x{0a}b a\r\nb 0: a\x{0d}\x{0a}b a\x{85}b 0: a\x{85}b a\x0bb 0: a\x{0b}b /.*a.*=.b.*/utf,newline=any QQQ\x{2029}ABCaXYZ=!bPQR 0: ABCaXYZ=!bPQR \= Expect no match a\x{2029}b No match \x61\xe2\x80\xa9\x62 No match /[[:a\x{100}b:]]/utf Failed: error 130 at offset 3: unknown POSIX class name /a[^]b/utf,allow_empty_class,match_unset_backref a\x{1234}b 0: a\x{1234}b a\nb 0: a\x{0a}b \= Expect no match ab No match /a[^]+b/utf,allow_empty_class,match_unset_backref aXb 0: aXb a\nX\nX\x{1234}b 0: a\x{0a}X\x{0a}X\x{1234}b \= Expect no match ab No match /(\x{de})\1/ \x{de}\x{de} 0: \xde\xde 1: \xde /X/newline=any,utf,firstline A\x{1ec5}ABCXYZ 0: X /Xa{2,4}b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa /Xa{2,4}?b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa /Xa{2,4}+b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa /X\x{123}{2,4}b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} /X\x{123}{2,4}?b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} /X\x{123}{2,4}+b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} /X\x{123}{2,4}b/utf \= Expect no match Xx\=ps No match X\x{123}x\=ps No match X\x{123}\x{123}x\=ps No match X\x{123}\x{123}\x{123}x\=ps No match X\x{123}\x{123}\x{123}\x{123}x\=ps No match /X\x{123}{2,4}?b/utf \= Expect no match Xx\=ps No match X\x{123}x\=ps No match X\x{123}\x{123}x\=ps No match X\x{123}\x{123}\x{123}x\=ps No match X\x{123}\x{123}\x{123}\x{123}x\=ps No match /X\x{123}{2,4}+b/utf \= Expect no match Xx\=ps No match X\x{123}x\=ps No match X\x{123}\x{123}x\=ps No match X\x{123}\x{123}\x{123}x\=ps No match X\x{123}\x{123}\x{123}\x{123}x\=ps No match /X\d{2,4}b/utf X\=ps Partial match: X X3\=ps Partial match: X3 X33\=ps Partial match: X33 X333\=ps Partial match: X333 X3333\=ps Partial match: X3333 /X\d{2,4}?b/utf X\=ps Partial match: X X3\=ps Partial match: X3 X33\=ps Partial match: X33 X333\=ps Partial match: X333 X3333\=ps Partial match: X3333 /X\d{2,4}+b/utf X\=ps Partial match: X X3\=ps Partial match: X3 X33\=ps Partial match: X33 X333\=ps Partial match: X333 X3333\=ps Partial match: X3333 /X\D{2,4}b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa /X\D{2,4}?b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa /X\D{2,4}+b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa /X\D{2,4}b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} /X\D{2,4}?b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} /X\D{2,4}+b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} /X[abc]{2,4}b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa /X[abc]{2,4}?b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa /X[abc]{2,4}+b/utf X\=ps Partial match: X Xa\=ps Partial match: Xa Xaa\=ps Partial match: Xaa Xaaa\=ps Partial match: Xaaa Xaaaa\=ps Partial match: Xaaaa /X[abc\x{123}]{2,4}b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} /X[abc\x{123}]{2,4}?b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} /X[abc\x{123}]{2,4}+b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} /X[^a]{2,4}b/utf X\=ps Partial match: X Xz\=ps Partial match: Xz Xzz\=ps Partial match: Xzz Xzzz\=ps Partial match: Xzzz Xzzzz\=ps Partial match: Xzzzz /X[^a]{2,4}?b/utf X\=ps Partial match: X Xz\=ps Partial match: Xz Xzz\=ps Partial match: Xzz Xzzz\=ps Partial match: Xzzz Xzzzz\=ps Partial match: Xzzzz /X[^a]{2,4}+b/utf X\=ps Partial match: X Xz\=ps Partial match: Xz Xzz\=ps Partial match: Xzz Xzzz\=ps Partial match: Xzzz Xzzzz\=ps Partial match: Xzzzz /X[^a]{2,4}b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} /X[^a]{2,4}?b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} /X[^a]{2,4}+b/utf X\=ps Partial match: X X\x{123}\=ps Partial match: X\x{123} X\x{123}\x{123}\=ps Partial match: X\x{123}\x{123} X\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123} X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: X\x{123}\x{123}\x{123}\x{123} /(Y)X\1{2,4}b/utf YX\=ps Partial match: YX YXY\=ps Partial match: YXY YXYY\=ps Partial match: YXYY YXYYY\=ps Partial match: YXYYY YXYYYY\=ps Partial match: YXYYYY /(Y)X\1{2,4}?b/utf YX\=ps Partial match: YX YXY\=ps Partial match: YXY YXYY\=ps Partial match: YXYY YXYYY\=ps Partial match: YXYYY YXYYYY\=ps Partial match: YXYYYY /(Y)X\1{2,4}+b/utf YX\=ps Partial match: YX YXY\=ps Partial match: YXY YXYY\=ps Partial match: YXYY YXYYY\=ps Partial match: YXYYY YXYYYY\=ps Partial match: YXYYYY /(\x{123})X\1{2,4}b/utf \x{123}X\=ps Partial match: \x{123}X \x{123}X\x{123}\=ps Partial match: \x{123}X\x{123} \x{123}X\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123} \x{123}X\x{123}\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123}\x{123} \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123} /(\x{123})X\1{2,4}?b/utf \x{123}X\=ps Partial match: \x{123}X \x{123}X\x{123}\=ps Partial match: \x{123}X\x{123} \x{123}X\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123} \x{123}X\x{123}\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123}\x{123} \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123} /(\x{123})X\1{2,4}+b/utf \x{123}X\=ps Partial match: \x{123}X \x{123}X\x{123}\=ps Partial match: \x{123}X\x{123} \x{123}X\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123} \x{123}X\x{123}\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123}\x{123} \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123} /\bthe cat\b/utf the cat\=ps 0: the cat the cat\=ph Partial match: the cat /abcd*/utf xxxxabcd\=ps 0: abcd xxxxabcd\=ph Partial match: abcd /abcd*/i,utf xxxxabcd\=ps 0: abcd xxxxabcd\=ph Partial match: abcd XXXXABCD\=ps 0: ABCD XXXXABCD\=ph Partial match: ABCD /abc\d*/utf xxxxabc1\=ps 0: abc1 xxxxabc1\=ph Partial match: abc1 /(a)bc\1*/utf xxxxabca\=ps 0: abca 1: a xxxxabca\=ph Partial match: abca /abc[de]*/utf xxxxabcde\=ps 0: abcde xxxxabcde\=ph Partial match: abcde /X\W{3}X/utf X\=ps Partial match: X /\sxxx\s/utf,tables=2 AB\x{85}xxx\x{a0}XYZ 0: \x{85}xxx\x{a0} AB\x{a0}xxx\x{85}XYZ 0: \x{a0}xxx\x{85} /\S \S/utf,tables=2 \x{a2} \x{84} 0: \x{a2} \x{84} 'A#хц'Bx,newline=any,utf ------------------------------------------------------------------ Bra A Ket End ------------------------------------------------------------------ 'A#хц PQ'Bx,newline=any,utf ------------------------------------------------------------------ Bra APQ Ket End ------------------------------------------------------------------ /a+#хaa z#XX?/Bx,newline=any,utf ------------------------------------------------------------------ Bra a++ z Ket End ------------------------------------------------------------------ /a+#хaa z#х?/Bx,newline=any,utf ------------------------------------------------------------------ Bra a++ z Ket End ------------------------------------------------------------------ /\g{A}xxx#bXX(?'A'123) (?'A'456)/Bx,newline=any,utf ------------------------------------------------------------------ Bra \1 xxx CBra 1 456 Ket Ket End ------------------------------------------------------------------ /\g{A}xxx#bх(?'A'123) (?'A'456)/Bx,newline=any,utf ------------------------------------------------------------------ Bra \1 xxx CBra 1 456 Ket Ket End ------------------------------------------------------------------ /^\cģ/utf Failed: error 168 at offset 3: \c must be followed by a printable ASCII character /(\R*)(.)/s,utf \r\n 0: \x{0d} 1: 2: \x{0d} \r\r\n\n\r 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} 1: \x{0d}\x{0d}\x{0a}\x{0a} 2: \x{0d} \r\r\n\n\r\n 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} 1: \x{0d}\x{0d}\x{0a}\x{0a} 2: \x{0d} /(\R)*(.)/s,utf \r\n 0: \x{0d} 1: <unset> 2: \x{0d} \r\r\n\n\r 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} 1: \x{0a} 2: \x{0d} \r\r\n\n\r\n 0: \x{0d}\x{0d}\x{0a}\x{0a}\x{0d} 1: \x{0a} 2: \x{0d} /[^\x{1234}]+/Ii,utf Capture group count = 0 Options: caseless utf Subject length lower bound = 1 /[^\x{1234}]+?/Ii,utf Capture group count = 0 Options: caseless utf Subject length lower bound = 1 /[^\x{1234}]++/Ii,utf Capture group count = 0 Options: caseless utf Subject length lower bound = 1 /[^\x{1234}]{2}/Ii,utf Capture group count = 0 Options: caseless utf Subject length lower bound = 2 /f.*/ for\=ph Partial match: for /f.*/s for\=ph Partial match: for /f.*/utf for\=ph Partial match: for /f.*/s,utf for\=ph Partial match: for /\x{d7ff}\x{e000}/utf /\x{d800}/utf Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) /\x{dfff}/utf Failed: error 173 at offset 7: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) /\h+/utf \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 0: \x{1680}\x{2000}\x{202f}\x{3000} \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} 0: \x{200a}\x{a0}\x{2000} /[\h\x{e000}]+/B,utf ------------------------------------------------------------------ Bra [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}\x{e000}]++ Ket End ------------------------------------------------------------------ \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} 0: \x{1680}\x{2000}\x{202f}\x{3000} \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} 0: \x{200a}\x{a0}\x{2000} /\H+/utf \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} 0: \x{167f}\x{1681}\x{180d}\x{180f} \x{2000}\x{200a}\x{1fff}\x{200b} 0: \x{1fff}\x{200b} \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} 0: \x{202e}\x{2030}\x{205e}\x{2060} \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} 0: \x{9f}\x{a1}\x{2fff}\x{3001} /[\H\x{d7ff}]+/B,utf ------------------------------------------------------------------ Bra [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}\x{d7ff}]++ Ket End ------------------------------------------------------------------ \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} 0: \x{167f}\x{1681}\x{180d}\x{180f} \x{2000}\x{200a}\x{1fff}\x{200b} 0: \x{1fff}\x{200b} \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} 0: \x{202e}\x{2030}\x{205e}\x{2060} \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} 0: \x{9f}\x{a1}\x{2fff}\x{3001} /\v+/utf \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d} /[\v\x{e000}]+/B,utf ------------------------------------------------------------------ Bra [\x0a-\x0d\x85\x{2028}-\x{2029}\x{e000}]++ Ket End ------------------------------------------------------------------ \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d} /\V+/utf \x{2028}\x{2029}\x{2027}\x{2030} 0: \x{2027}\x{2030} \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} 0: \x{09}\x{0e}\x{84}\x{86} /[\V\x{d7ff}]+/B,utf ------------------------------------------------------------------ Bra [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}\x{d7ff}]++ Ket End ------------------------------------------------------------------ \x{2028}\x{2029}\x{2027}\x{2030} 0: \x{2027}\x{2030} \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} 0: \x{09}\x{0e}\x{84}\x{86} /\R+/bsr=unicode,utf \x{2027}\x{2030}\x{2028}\x{2029} 0: \x{2028}\x{2029} \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d 0: \x{85}\x{0a}\x{0b}\x{0c}\x{0d} /(..)\1/utf ab\=ps Partial match: ab aba\=ps Partial match: aba abab\=ps 0: abab 1: ab /(..)\1/i,utf ab\=ps Partial match: ab abA\=ps Partial match: abA aBAb\=ps 0: aBAb 1: aB /(..)\1{2,}/utf ab\=ps Partial match: ab aba\=ps Partial match: aba abab\=ps Partial match: abab ababa\=ps Partial match: ababa ababab\=ps 0: ababab 1: ab ababab\=ph Partial match: ababab abababa\=ps 0: ababab 1: ab abababa\=ph Partial match: abababa /(..)\1{2,}/i,utf ab\=ps Partial match: ab aBa\=ps Partial match: aBa aBAb\=ps Partial match: aBAb AbaBA\=ps Partial match: AbaBA abABAb\=ps 0: abABAb 1: ab aBAbaB\=ph Partial match: aBAbaB abABabA\=ps 0: abABab 1: ab abaBABa\=ph Partial match: abaBABa /(..)\1{2,}?x/i,utf ab\=ps Partial match: ab abA\=ps Partial match: abA aBAb\=ps Partial match: aBAb abaBA\=ps Partial match: abaBA abAbaB\=ps Partial match: abAbaB abaBabA\=ps Partial match: abaBabA abAbABaBx\=ps 0: abAbABaBx 1: ab /./utf,newline=crlf \r\=ps 0: \x{0d} \r\=ph Partial match: \x{0d} /.{2,3}/utf,newline=crlf \r\=ps Partial match: \x{0d} \r\=ph Partial match: \x{0d} \r\r\=ps 0: \x{0d}\x{0d} \r\r\=ph Partial match: \x{0d}\x{0d} \r\r\r\=ps 0: \x{0d}\x{0d}\x{0d} \r\r\r\=ph Partial match: \x{0d}\x{0d}\x{0d} /.{2,3}?/utf,newline=crlf \r\=ps Partial match: \x{0d} \r\=ph Partial match: \x{0d} \r\r\=ps 0: \x{0d}\x{0d} \r\r\=ph Partial match: \x{0d}\x{0d} \r\r\r\=ps 0: \x{0d}\x{0d} \r\r\r\=ph 0: \x{0d}\x{0d} /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf ------------------------------------------------------------------ Bra [^\x{100}] [^\x{1234}] [^\x{ffff}] [^\x{10000}] [^\x{10ffff}] Ket End ------------------------------------------------------------------ /[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf ------------------------------------------------------------------ Bra /i [^\x{100}] /i [^\x{1234}] /i [^\x{ffff}] /i [^\x{10000}] /i [^\x{10ffff}] Ket End ------------------------------------------------------------------ /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf ------------------------------------------------------------------ Bra [^\x{100}]* [^\x{10000}]+ [^\x{10ffff}]?? [^\x{8000}]{4} [^\x{8000}]* [^\x{7fff}]{2} [^\x{7fff}]{0,7}? [^\x{fffff}]{5} [^\x{fffff}]?+ Ket End ------------------------------------------------------------------ /[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf ------------------------------------------------------------------ Bra /i [^\x{100}]* /i [^\x{10000}]+ /i [^\x{10ffff}]?? /i [^\x{8000}]{4} /i [^\x{8000}]* /i [^\x{7fff}]{2} /i [^\x{7fff}]{0,7}? /i [^\x{fffff}]{5} /i [^\x{fffff}]?+ Ket End ------------------------------------------------------------------ /(?<=\x{1234}\x{1234})\bxy/I,utf Capture group count = 0 Max lookbehind = 2 Options: utf First code unit = 'x' Last code unit = 'y' Subject length lower bound = 2 /(?<!^)ETA/utf \= Expect no match ETA No match /\u0100/B,utf,alt_bsux,allow_empty_class,match_unset_backref ------------------------------------------------------------------ Bra \x{100} Ket End ------------------------------------------------------------------ /[\u0100-\u0200]/B,utf,alt_bsux,allow_empty_class,match_unset_backref ------------------------------------------------------------------ Bra [\x{100}-\x{200}] Ket End ------------------------------------------------------------------ /\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref Failed: error 173 at offset 6: disallowed Unicode code point (>= 0xd800 && <= 0xdfff) /^\u{0000000000010ffff}/utf,extra_alt_bsux \x{10ffff} 0: \x{10ffff} /\u/utf,alt_bsux \\u 0: u /^a+[a\x{200}]/B,utf ------------------------------------------------------------------ Bra ^ a+ [a\x{200}] Ket End ------------------------------------------------------------------ aa 0: aa /[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/B,utf ------------------------------------------------------------------ Bra [b-d\x{200}-\x{250}]*+ [ae-h]?+ # [\x{200}-\x{250}]{0,8}+ [\x00-\xff]* # [\x{200}-\x{250}]++ [a-z] Ket End ------------------------------------------------------------------ /[\p{L}]/IB ------------------------------------------------------------------ Bra [\p{L}] Ket End ------------------------------------------------------------------ Capture group count = 0 Subject length lower bound = 1 /[\p{^L}]/IB ------------------------------------------------------------------ Bra [\P{L}] Ket End ------------------------------------------------------------------ Capture group count = 0 Subject length lower bound = 1 /[\P{L}]/IB ------------------------------------------------------------------ Bra [\P{L}] Ket End ------------------------------------------------------------------ Capture group count = 0 Subject length lower bound = 1 /[\P{^L}]/IB ------------------------------------------------------------------ Bra [\p{L}] Ket End ------------------------------------------------------------------ Capture group count = 0 Subject length lower bound = 1 /[abc\p{L}\x{0660}]/IB,utf ------------------------------------------------------------------ Bra [a-c\p{L}\x{660}] Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf Subject length lower bound = 1 /[\p{Nd}]/IB,utf ------------------------------------------------------------------ Bra [\p{Nd}] Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf Subject length lower bound = 1 1234 0: 1 /[\p{Nd}+-]+/IB,utf ------------------------------------------------------------------ Bra [+\-\p{Nd}]++ Ket End ------------------------------------------------------------------ Capture group count = 0 Options: utf Subject length lower bound = 1 1234 0: 1234 12-34 0: 12-34 12+\x{661}-34 0: 12+\x{661}-34 \= Expect no match abcd No match /(?:[\PPa*]*){8,}/ /[\P{Any}]/B ------------------------------------------------------------------ Bra [\P{Any}] Ket End ------------------------------------------------------------------ /[\P{Any}\E]/B ------------------------------------------------------------------ Bra [\P{Any}] Ket End ------------------------------------------------------------------ /(\P{Yi}+\277)/ /(\P{Yi}+\277)?/ /(?<=\P{Yi}{3}A)X/ /\p{Yi}+(\P{Yi}+)(?1)/ /(\P{Yi}{2}\277)?/ /[\P{Yi}A]/ /[\P{Yi}\P{Yi}\P{Yi}A]/ /[^\P{Yi}A]/ /[^\P{Yi}\P{Yi}\P{Yi}A]/ /(\P{Yi}*\277)*/ /(\P{Yi}*?\277)*/ /(\p{Yi}*+\277)*/ /(\P{Yi}?\277)*/ /(\P{Yi}??\277)*/ /(\p{Yi}?+\277)*/ /(\P{Yi}{0,3}\277)*/ /(\P{Yi}{0,3}?\277)*/ /(\p{Yi}{0,3}+\277)*/ /\p{Zl}{2,3}+/B,utf ------------------------------------------------------------------ Bra prop Zl {2} prop Zl ?+ Ket End ------------------------------------------------------------------ 0: \x{2028}\x{2028} \x{2028}\x{2028}\x{2028} 0: \x{2028}\x{2028}\x{2028} /\p{Zl}/B,utf ------------------------------------------------------------------ Bra prop Zl Ket End ------------------------------------------------------------------ /\p{Lu}{3}+/B,utf ------------------------------------------------------------------ Bra prop Lu {3} Ket End ------------------------------------------------------------------ /\pL{2}+/B,utf ------------------------------------------------------------------ Bra prop L {2} Ket End ------------------------------------------------------------------ /\p{Cc}{2}+/B,utf ------------------------------------------------------------------ Bra prop Cc {2} Ket End ------------------------------------------------------------------ /^\p{Cf}/utf \x{180e} 0: \x{180e} \x{061c} 0: \x{61c} \x{2066} 0: \x{2066} \x{2067} 0: \x{2067} \x{2068} 0: \x{2068} \x{2069} 0: \x{2069} /^\p{Cs}/utf \x{dfff}\=no_utf_check 0: \x{dfff} \= Expect no match \x{09f} No match /^\p{Mn}/utf \x{1a1b} 0: \x{1a1b} /^\p{Pe}/utf \x{2309} 0: \x{2309} \x{230b} 0: \x{230b} /^\p{Ps}/utf \x{2308} 0: \x{2308} \x{230a} 0: \x{230a} /^\p{Sc}+/utf $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6} 0: $\x{a2}\x{a3}\x{a4}\x{a5} \x{9f2} 0: \x{9f2} \= Expect no match X No match \x{2c2} No match /^\p{Zs}/utf \ \ 0: \x{a0} 0: \x{a0} \x{1680} 0: \x{1680} \x{2000} 0: \x{2000} \x{2001} 0: \x{2001} \= Expect no match \x{2028} No match \x{200d} No match # These are here because Perl has problems with the negative versions of the # properties and has changed how it behaves for caseless matching. /\p{^Lu}/i,utf 1234 0: 1 \= Expect no match ABC No match /\P{Lu}/i,utf 1234 0: 1 \= Expect no match ABC No match /\p{Ll}/i,utf a 0: a Az 0: z \= Expect no match ABC No match /\p{Lu}/i,utf A 0: A a\x{10a0}B 0: \x{10a0} \= Expect no match a No match \x{1d00} No match /\p{Lu}/i,utf A 0: A aZ 0: Z \= Expect no match abc No match /[\x{c0}\x{391}]/i,utf \x{c0} 0: \x{c0} \x{e0} 0: \x{e0} # The next two are special cases where the lengths of the different cases of # the same character differ. The first went wrong with heap frame storage; the # second was broken in all cases. /^\x{023a}+?(\x{0130}+)/i,utf \x{023a}\x{2c65}\x{0130} 0: \x{23a}\x{2c65}\x{130} 1: \x{130} /^\x{023a}+([^X])/i,utf \x{023a}\x{2c65}X 0: \x{23a}\x{2c65} 1: \x{2c65} /\x{c0}+\x{116}+/i,utf \x{c0}\x{e0}\x{116}\x{117} 0: \x{c0}\x{e0}\x{116}\x{117} /[\x{c0}\x{116}]+/i,utf \x{c0}\x{e0}\x{116}\x{117} 0: \x{c0}\x{e0}\x{116}\x{117} /(\x{de})\1/i,utf \x{de}\x{de} 0: \x{de}\x{de} 1: \x{de} \x{de}\x{fe} 0: \x{de}\x{fe} 1: \x{de} \x{fe}\x{fe} 0: \x{fe}\x{fe} 1: \x{fe} \x{fe}\x{de} 0: \x{fe}\x{de} 1: \x{fe} /^\x{c0}$/i,utf \x{c0} 0: \x{c0} \x{e0} 0: \x{e0} /^\x{e0}$/i,utf \x{c0} 0: \x{c0} \x{e0} 0: \x{e0} # The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE # will match it only with UCP support, because without that it has no notion # of case for anything other than the ASCII letters. /((?i)[\x{c0}])/utf \x{c0} 0: \x{c0} 1: \x{c0} \x{e0} 0: \x{e0} 1: \x{e0} /(?i:[\x{c0}])/utf \x{c0} 0: \x{c0} \x{e0} 0: \x{e0} # These are PCRE's extra properties to help with Unicodizing \d etc. /^\p{Xan}/utf ABCD 0: A 1234 0: 1 \x{6ca} 0: \x{6ca} \x{a6c} 0: \x{a6c} \x{10a7} 0: \x{10a7} \= Expect no match _ABC No match /^\p{Xan}+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} \= Expect no match _ABC No match /^\p{Xan}+?/utf \x{6ca}\x{a6c}\x{10a7}_ 0: \x{6ca} /^\p{Xan}*/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} /^\p{Xan}{2,9}/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca} /^\p{Xan}{2,9}?/utf \x{6ca}\x{a6c}\x{10a7}_ 0: \x{6ca}\x{a6c} /^[\p{Xan}]/utf ABCD1234_ 0: A 1234abcd_ 0: 1 \x{6ca} 0: \x{6ca} \x{a6c} 0: \x{a6c} \x{10a7} 0: \x{10a7} \= Expect no match _ABC No match /^[\p{Xan}]+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7} \= Expect no match _ABC No match /^>\p{Xsp}/utf >\x{1680}\x{2028}\x{0b} 0: >\x{1680} >\x{a0} 0: >\x{a0} \= Expect no match \x{0b} No match /^>\p{Xsp}+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xsp}+?/utf >\x{1680}\x{2028}\x{0b} 0: >\x{1680} /^>\p{Xsp}*/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xsp}{2,9}/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xsp}{2,9}?/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09} /^>[\p{Xsp}]/utf >\x{2028}\x{0b} 0: >\x{2028} /^>[\p{Xsp}]+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xps}/utf >\x{1680}\x{2028}\x{0b} 0: >\x{1680} >\x{a0} 0: >\x{a0} \= Expect no match \x{0b} No match /^>\p{Xps}+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xps}+?/utf >\x{1680}\x{2028}\x{0b} 0: >\x{1680} /^>\p{Xps}*/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xps}{2,9}/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^>\p{Xps}{2,9}?/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09} /^>[\p{Xps}]/utf >\x{2028}\x{0b} 0: >\x{2028} /^>[\p{Xps}]+/utf > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} 0: > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} /^\p{Xwd}/utf ABCD 0: A 1234 0: 1 \x{6ca} 0: \x{6ca} \x{a6c} 0: \x{a6c} \x{10a7} 0: \x{10a7} _ABC 0: _ \= Expect no match [] No match /^\p{Xwd}+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ /^\p{Xwd}+?/utf \x{6ca}\x{a6c}\x{10a7}_ 0: \x{6ca} /^\p{Xwd}*/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ /^\p{Xwd}{2,9}/utf A_B12\x{6ca}\x{a6c}\x{10a7} 0: A_B12\x{6ca}\x{a6c}\x{10a7} /^\p{Xwd}{2,9}?/utf \x{6ca}\x{a6c}\x{10a7}_ 0: \x{6ca}\x{a6c} /^[\p{Xwd}]/utf ABCD1234_ 0: A 1234abcd_ 0: 1 \x{6ca} 0: \x{6ca} \x{a6c} 0: \x{a6c} \x{10a7} 0: \x{10a7} _ABC 0: _ \= Expect no match [] No match /^[\p{Xwd}]+/utf ABCD1234\x{6ca}\x{a6c}\x{10a7}_ 0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_ # A check not in UTF-8 mode /^[\p{Xwd}]+/ ABCD1234_ 0: ABCD1234_ # Some negative checks /^[\P{Xwd}]+/utf !.+\x{019}\x{35a}AB 0: !.+\x{19}\x{35a} /^[\p{^Xwd}]+/utf !.+\x{019}\x{35a}AB 0: !.+\x{19}\x{35a} /[\D]/B,utf,ucp ------------------------------------------------------------------ Bra [\P{Nd}] Ket End ------------------------------------------------------------------ 1\x{3c8}2 0: \x{3c8} /[\d]/B,utf,ucp ------------------------------------------------------------------ Bra [\p{Nd}] Ket End ------------------------------------------------------------------ >\x{6f4}< 0: \x{6f4} /[\S]/B,utf,ucp ------------------------------------------------------------------ Bra [\P{Xsp}] Ket End ------------------------------------------------------------------ \x{1680}\x{6f4}\x{1680} 0: \x{6f4} /[\s]/B,utf,ucp ------------------------------------------------------------------ Bra [\p{Xsp}] Ket End ------------------------------------------------------------------ >\x{1680}< 0: \x{1680} /[\W]/B,utf,ucp ------------------------------------------------------------------ Bra [\P{Xwd}] Ket End ------------------------------------------------------------------ A\x{1712}B 0: \x{1712} /[\w]/B,utf,ucp ------------------------------------------------------------------ Bra [\p{Xwd}] Ket End ------------------------------------------------------------------ >\x{1723}< 0: \x{1723} /\D/B,utf,ucp ------------------------------------------------------------------ Bra notprop Nd Ket End ------------------------------------------------------------------ 1\x{3c8}2 0: \x{3c8} /\d/B,utf,ucp ------------------------------------------------------------------ Bra prop Nd Ket End ------------------------------------------------------------------ >\x{6f4}< 0: \x{6f4} /\S/B,utf,ucp ------------------------------------------------------------------ Bra notprop Xsp Ket End ------------------------------------------------------------------ \x{1680}\x{6f4}\x{1680} 0: \x{6f4} /\s/B,utf,ucp ------------------------------------------------------------------ Bra prop Xsp Ket End ------------------------------------------------------------------ >\x{1680}> 0: \x{1680} /\W/B,utf,ucp ------------------------------------------------------------------ Bra notprop Xwd Ket End ------------------------------------------------------------------ A\x{1712}B 0: \x{1712} /\w/B,utf,ucp ------------------------------------------------------------------ Bra prop Xwd Ket End ------------------------------------------------------------------ >\x{1723}< 0: \x{1723} /[[:alpha:]]/B,ucp ------------------------------------------------------------------ Bra [\p{L}] Ket End ------------------------------------------------------------------ /[[:lower:]]/B,ucp ------------------------------------------------------------------ Bra [\p{Ll}] Ket End ------------------------------------------------------------------ /[[:upper:]]/B,ucp ------------------------------------------------------------------ Bra [\p{Lu}] Ket End ------------------------------------------------------------------ /[[:alnum:]]/B,ucp ------------------------------------------------------------------ Bra [\p{Xan}] Ket End ------------------------------------------------------------------ /[[:ascii:]]/B,ucp ------------------------------------------------------------------ Bra [\x00-\x7f] Ket End ------------------------------------------------------------------ /[[:cntrl:]]/B,ucp ------------------------------------------------------------------ Bra [\p{Cc}] Ket End ------------------------------------------------------------------ /[[:digit:]]/B,ucp ------------------------------------------------------------------ Bra [\p{Nd}] Ket End ------------------------------------------------------------------ /[[:graph:]]/B,ucp ------------------------------------------------------------------ Bra [[:graph:]] Ket End ------------------------------------------------------------------ /[[:print:]]/B,ucp ------------------------------------------------------------------ Bra [[:print:]] Ket End ------------------------------------------------------------------ /[[:punct:]]/B,ucp ------------------------------------------------------------------ Bra [[:punct:]] Ket End ------------------------------------------------------------------ /[[:space:]]/B,ucp ------------------------------------------------------------------ Bra [\p{Xps}] Ket End ------------------------------------------------------------------ /[[:word:]]/B,ucp ------------------------------------------------------------------ Bra [\p{Xwd}] Ket End ------------------------------------------------------------------ /[[:xdigit:]]/B,ucp ------------------------------------------------------------------ Bra [0-9A-Fa-f] Ket End ------------------------------------------------------------------ # Unicode properties for \b abd \B /\b...\B/utf,ucp abc_ 0: abc \x{37e}abc\x{376} 0: abc \x{37e}\x{376}\x{371}\x{393}\x{394} 0: \x{376}\x{371}\x{393} !\x{c0}++\x{c1}\x{c2} 0: ++\x{c1} !\x{c0}+++++ 0: \x{c0}++ # Without PCRE_UCP, non-ASCII always fail, even if < 256 /\b...\B/utf abc_ 0: abc \= Expect no match \x{37e}abc\x{376} No match \x{37e}\x{376}\x{371}\x{393}\x{394} No match !\x{c0}++\x{c1}\x{c2} No match !\x{c0}+++++ No match # With PCRE_UCP, non-UTF8 chars that are < 256 still check properties /\b...\B/ucp abc_ 0: abc !\x{c0}++\x{c1}\x{c2} 0: ++\xc1 !\x{c0}+++++ 0: \xc0++ # Some of these are silly, but they check various combinations /[[:^alpha:][:^cntrl:]]+/B,utf,ucp ------------------------------------------------------------------ Bra [\P{L}\P{Cc}]++ Ket End ------------------------------------------------------------------ 123 0: 123 abc 0: abc /[[:^cntrl:][:^alpha:]]+/B,utf,ucp ------------------------------------------------------------------ Bra [\P{Cc}\P{L}]++ Ket End ------------------------------------------------------------------ 123 0: 123 abc 0: abc /[[:alpha:]]+/B,utf,ucp ------------------------------------------------------------------ Bra [\p{L}]++ Ket End ------------------------------------------------------------------ abc 0: abc /[[:^alpha:]\S]+/B,utf,ucp ------------------------------------------------------------------ Bra [\P{L}\P{Xsp}]++ Ket End ------------------------------------------------------------------ 123 0: 123 abc 0: abc /[^\d]+/B,utf,ucp ------------------------------------------------------------------ Bra [^\p{Nd}]++ Ket End ------------------------------------------------------------------ abc123 0: abc abc\x{123} 0: abc\x{123} \x{660}abc 0: abc /\p{Lu}+9\p{Lu}+B\p{Lu}+b/B ------------------------------------------------------------------ Bra prop Lu ++ 9 prop Lu + B prop Lu ++ b Ket End ------------------------------------------------------------------ /\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B ------------------------------------------------------------------ Bra notprop Lu + 9 notprop Lu ++ B notprop Lu + b Ket End ------------------------------------------------------------------ /\P{Lu}+9\P{Lu}+B\P{Lu}+b/B ------------------------------------------------------------------ Bra notprop Lu + 9 notprop Lu ++ B notprop Lu + b Ket End ------------------------------------------------------------------ /\p{Han}+X\p{Greek}+\x{370}/B,utf ------------------------------------------------------------------ Bra prop Han ++ X prop Greek + \x{370} Ket End ------------------------------------------------------------------ /\p{Xan}+!\p{Xan}+A/B ------------------------------------------------------------------ Bra prop Xan ++ ! prop Xan + A Ket End ------------------------------------------------------------------ /\p{Xsp}+!\p{Xsp}\t/B ------------------------------------------------------------------ Bra prop Xsp ++ ! prop Xsp \x09 Ket End ------------------------------------------------------------------ /\p{Xps}+!\p{Xps}\t/B ------------------------------------------------------------------ Bra prop Xps ++ ! prop Xps \x09 Ket End ------------------------------------------------------------------ /\p{Xwd}+!\p{Xwd}_/B ------------------------------------------------------------------ Bra prop Xwd ++ ! prop Xwd _ Ket End ------------------------------------------------------------------ /A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp ------------------------------------------------------------------ Bra A++ prop N A++ prop Nd B+ prop N *+ B++ prop Nd *+ Ket End ------------------------------------------------------------------ # These behaved oddly in Perl, so they are kept in this test /(\x{23a}\x{23a}\x{23a})?\1/i,utf \= Expect no match \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} No match /(ȺȺȺ)?\1/i,utf \= Expect no match ȺȺȺⱥⱥ No match /(\x{23a}\x{23a}\x{23a})?\1/i,utf \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 1: \x{23a}\x{23a}\x{23a} /(ȺȺȺ)?\1/i,utf ȺȺȺⱥⱥⱥ 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 1: \x{23a}\x{23a}\x{23a} /(\x{23a}\x{23a}\x{23a})\1/i,utf \= Expect no match \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} No match /(ȺȺȺ)\1/i,utf \= Expect no match ȺȺȺⱥⱥ No match /(\x{23a}\x{23a}\x{23a})\1/i,utf \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 1: \x{23a}\x{23a}\x{23a} /(ȺȺȺ)\1/i,utf ȺȺȺⱥⱥⱥ 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} 1: \x{23a}\x{23a}\x{23a} /(\x{2c65}\x{2c65})\1/i,utf \x{2c65}\x{2c65}\x{23a}\x{23a} 0: \x{2c65}\x{2c65}\x{23a}\x{23a} 1: \x{2c65}\x{2c65} /(ⱥⱥ)\1/i,utf ⱥⱥȺȺ 0: \x{2c65}\x{2c65}\x{23a}\x{23a} 1: \x{2c65}\x{2c65} /(\x{23a}\x{23a}\x{23a})\1Y/i,utf X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ 0: \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}Y 1: \x{23a}\x{23a}\x{23a} /(\x{2c65}\x{2c65})\1Y/i,utf X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ 0: \x{2c65}\x{2c65}\x{23a}\x{23a}Y 1: \x{2c65}\x{2c65} # These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE /^[\p{Batak}]/utf \x{1bc0} 0: \x{1bc0} \x{1bff} 0: \x{1bff} \= Expect no match \x{1bf4} No match /^[\p{Brahmi}]/utf \x{11000} 0: \x{11000} \x{1106f} 0: \x{1106f} \= Expect no match \x{1104e} No match /^[\p{Mandaic}]/utf \x{840} 0: \x{840} \x{85e} 0: \x{85e} \= Expect no match \x{85c} No match \x{85d} No match /(\X*)(.)/s,utf A\x{300} 0: A 1: 2: A /^S(\X*)e(\X*)$/utf Stéréo 0: Ste\x{301}re\x{301}o 1: te\x{301}r 2: \x{301}o /^\X/utf ́réo 0: \x{301} /^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames aX41z 0: aX41z \= Expect no match aAz No match /\X/ a\=ps 0: a a\=ph Partial match: a /\Xa/ aa\=ps 0: aa aa\=ph 0: aa /\X{2}/ aa\=ps 0: aa aa\=ph Partial match: aa /\X+a/ a\=ps Partial match: a aa\=ps 0: aa aa\=ph Partial match: aa /\X+?a/ a\=ps Partial match: a ab\=ps Partial match: ab aa\=ps 0: aa aa\=ph 0: aa aba\=ps 0: aba # These Unicode 6.1.0 scripts are not known to Perl. /\p{Chakma}\d/utf,ucp \x{11100}\x{1113c} 0: \x{11100}\x{1113c} /\p{Takri}\d/utf,ucp \x{11680}\x{116c0} 0: \x{11680}\x{116c0} /^\X/utf A\=ps 0: A A\=ph Partial match: A A\x{300}\x{301}\=ps 0: A\x{300}\x{301} A\x{300}\x{301}\=ph Partial match: A\x{300}\x{301} A\x{301}\=ps 0: A\x{301} A\x{301}\=ph Partial match: A\x{301} /^\X{2,3}/utf A\=ps Partial match: A A\=ph Partial match: A AA\=ps 0: AA AA\=ph Partial match: AA A\x{300}\x{301}\=ps Partial match: A\x{300}\x{301} A\x{300}\x{301}\=ph Partial match: A\x{300}\x{301} A\x{300}\x{301}A\x{300}\x{301}\=ps 0: A\x{300}\x{301}A\x{300}\x{301} A\x{300}\x{301}A\x{300}\x{301}\=ph Partial match: A\x{300}\x{301}A\x{300}\x{301} /^\X{2}/utf AA\=ps 0: AA AA\=ph Partial match: AA A\x{300}\x{301}A\x{300}\x{301}\=ps 0: A\x{300}\x{301}A\x{300}\x{301} A\x{300}\x{301}A\x{300}\x{301}\=ph Partial match: A\x{300}\x{301}A\x{300}\x{301} /^\X+/utf AA\=ps 0: AA AA\=ph Partial match: AA /^\X+?Z/utf AA\=ps Partial match: AA AA\=ph Partial match: AA /A\x{3a3}B/IBi,utf ------------------------------------------------------------------ Bra /i A clist 03a3 03c2 03c3 /i B Ket End ------------------------------------------------------------------ Capture group count = 0 Options: caseless utf First code unit = 'A' (caseless) Last code unit = 'B' (caseless) Subject length lower bound = 3 /[\x{3a3}]/Bi,utf ------------------------------------------------------------------ Bra clist 03a3 03c2 03c3 Ket End ------------------------------------------------------------------ /[^\x{3a3}]/Bi,utf ------------------------------------------------------------------ Bra not clist 03a3 03c2 03c3 Ket End ------------------------------------------------------------------ /[\x{3a3}]+/Bi,utf ------------------------------------------------------------------ Bra clist 03a3 03c2 03c3 ++ Ket End ------------------------------------------------------------------ /[^\x{3a3}]+/Bi,utf ------------------------------------------------------------------ Bra not clist 03a3 03c2 03c3 ++ Ket End ------------------------------------------------------------------ /a*\x{3a3}/Bi,utf ------------------------------------------------------------------ Bra /i a*+ clist 03a3 03c2 03c3 Ket End ------------------------------------------------------------------ /\x{3a3}+a/Bi,utf ------------------------------------------------------------------ Bra clist 03a3 03c2 03c3 ++ /i a Ket End ------------------------------------------------------------------ /\x{3a3}*\x{3c2}/Bi,utf ------------------------------------------------------------------ Bra clist 03a3 03c2 03c3 * clist 03a3 03c2 03c3 Ket End ------------------------------------------------------------------ /\x{3a3}{3}/i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 0: \x{3a3}\x{3c3}\x{3c2} 0+ \x{3a3}\x{3c3}\x{3c2} /\x{3a3}{2,4}/i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 0: \x{3a3}\x{3c3}\x{3c2}\x{3a3} 0+ \x{3c3}\x{3c2} /\x{3a3}{2,4}?/i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 0: \x{3a3}\x{3c3} 0+ \x{3c2}\x{3a3}\x{3c3}\x{3c2} /\x{3a3}+./i,utf,aftertext \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 0: \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} 0+ /\x{3a3}++./i,utf,aftertext \= Expect no match \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} No match /\x{3a3}*\x{3c2}/Bi,utf ------------------------------------------------------------------ Bra clist 03a3 03c2 03c3 * clist 03a3 03c2 03c3 Ket End ------------------------------------------------------------------ /[^\x{3a3}]*\x{3c2}/Bi,utf ------------------------------------------------------------------ Bra not clist 03a3 03c2 03c3 *+ clist 03a3 03c2 03c3 Ket End ------------------------------------------------------------------ /[^a]*\x{3c2}/Bi,utf ------------------------------------------------------------------ Bra /i [^a]* clist 03a3 03c2 03c3 Ket End ------------------------------------------------------------------ /ist/Bi,utf ------------------------------------------------------------------ Bra /i i clist 0053 0073 017f /i t Ket End ------------------------------------------------------------------ \= Expect no match ikt No match /is+t/i,utf iSs\x{17f}t 0: iSs\x{17f}t \= Expect no match ikt No match /is+?t/i,utf \= Expect no match ikt No match /is?t/i,utf \= Expect no match ikt No match /is{2}t/i,utf \= Expect no match iskt No match # This property is a PCRE special /^\p{Xuc}/utf $abc 0: $ @abc 0: @ `abc 0: ` \x{1234}abc 0: \x{1234} \= Expect no match abc No match /^\p{Xuc}+/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000} \= Expect no match \x{9f} No match /^\p{Xuc}+?/utf $@`\x{a0}\x{1234}\x{e000}** 0: $ \= Expect no match \x{9f} No match /^\p{Xuc}+?\*/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000}* \= Expect no match \x{9f} No match /^\p{Xuc}++/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000} \= Expect no match \x{9f} No match /^\p{Xuc}{3,5}/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234} \= Expect no match \x{9f} No match /^\p{Xuc}{3,5}?/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@` \= Expect no match \x{9f} No match /^[\p{Xuc}]/utf $@`\x{a0}\x{1234}\x{e000}** 0: $ \= Expect no match \x{9f} No match /^[\p{Xuc}]+/utf $@`\x{a0}\x{1234}\x{e000}** 0: $@`\x{a0}\x{1234}\x{e000} \= Expect no match \x{9f} No match /^\P{Xuc}/utf abc 0: a \= Expect no match $abc No match @abc No match `abc No match \x{1234}abc No match /^[\P{Xuc}]/utf abc 0: a \= Expect no match $abc No match @abc No match `abc No match \x{1234}abc No match # Some auto-possessification tests /\pN+\z/B ------------------------------------------------------------------ Bra prop N ++ \z Ket End ------------------------------------------------------------------ /\PN+\z/B ------------------------------------------------------------------ Bra notprop N ++ \z Ket End ------------------------------------------------------------------ /\pN+/B ------------------------------------------------------------------ Bra prop N ++ Ket End ------------------------------------------------------------------ /\PN+/B ------------------------------------------------------------------ Bra notprop N ++ Ket End ------------------------------------------------------------------ /\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra AllAny+ AllAny AllAny+ notprop Any AllAny+ prop Lc AllAny+ prop L AllAny+ prop Lu AllAny+ prop Han AllAny+ prop Xan AllAny+ prop Xsp AllAny+ prop Xps prop Xwd + AllAny AllAny+ prop Xuc Ket End ------------------------------------------------------------------ /\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop Lc + AllAny prop Lc + prop Lc notprop Lc ++ prop Lc prop Lc + prop L prop Lc + prop Lu prop Lc + prop Han prop Lc + prop Xan prop Lc ++ notprop Xan prop Lc ++ prop Xsp prop Lc ++ prop Xps prop Xwd + prop Lc prop Lc + prop Xuc Ket End ------------------------------------------------------------------ /\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop N + AllAny prop N + prop Lc prop N ++ prop L prop N + notprop L prop N ++ notprop N prop N ++ prop Lu prop N + prop Han prop N + prop Xan prop N ++ prop Xsp prop N ++ prop Xps prop Xwd + prop N prop N + prop Xuc Ket End ------------------------------------------------------------------ /\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop Lu + AllAny prop Lu + prop Lc prop Lu + prop L prop Lu + prop Lu notprop Lu ++ prop Lu prop Lu ++ prop Nd prop Lu + notprop Nd prop Lu + prop Han prop Lu + prop Xan prop Lu ++ prop Xsp prop Lu ++ prop Xps prop Xwd + prop Lu prop Lu + prop Xuc Ket End ------------------------------------------------------------------ /\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop Han + prop Lu prop Han + prop Lc prop Han + prop L prop Han + prop Lu prop Han ++ prop Arabic prop Arabic + prop Arabic prop Han + prop Xan prop Han + prop Xsp prop Han + prop Xps prop Xwd + prop Han prop Han + prop Xuc Ket End ------------------------------------------------------------------ /\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop Xan + AllAny prop Xan + prop Lc notprop Xan ++ prop Lc prop Xan + prop L prop Xan + prop Lu prop Xan + prop Han prop Xan + prop Xan prop Xan ++ notprop Xan prop Xan ++ prop Xsp prop Xan ++ prop Xps prop Xwd + prop Xan prop Xan + prop Xuc Ket End ------------------------------------------------------------------ /\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop Xsp + AllAny prop Xsp ++ prop Lc prop Xsp ++ prop L prop Xsp ++ prop Lu prop Xsp + prop Han prop Xsp ++ prop Xan prop Xsp + prop Xsp notprop Xsp ++ prop Xsp prop Xsp + prop Xps prop Xwd ++ prop Xsp prop Xsp + prop Xuc Ket End ------------------------------------------------------------------ /\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop Xwd + AllAny prop Xwd + prop Lc prop Xwd + prop L prop Xwd + prop Lu prop Xwd + prop Han prop Xwd + prop Xan prop Xwd ++ prop Xsp prop Xwd ++ prop Xps prop Xwd + prop Xwd prop Xwd ++ notprop Xwd prop Xwd + prop Xuc Ket End ------------------------------------------------------------------ /\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp ------------------------------------------------------------------ Bra prop Xuc + AllAny prop Xuc + prop Lc prop Xuc + prop L prop Xuc + prop Lu prop Xuc + prop Han prop Xuc + prop Xan prop Xuc + prop Xsp prop Xuc + prop Xps prop Xwd + prop Xuc prop Xuc + prop Xuc prop Xuc ++ notprop Xuc Ket End ------------------------------------------------------------------ /\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp ------------------------------------------------------------------ Bra prop N ++ prop Ll prop N + prop Nd prop N + notprop Nd Ket End ------------------------------------------------------------------ /\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp ------------------------------------------------------------------ Bra prop Xan + prop L prop Xan + prop N prop Xan ++ prop C prop Xan + notprop L notprop Xan ++ prop N prop Xan + notprop C Ket End ------------------------------------------------------------------ /\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp ------------------------------------------------------------------ Bra prop L + prop Xan prop N + prop Xan prop C ++ prop Xan notprop L + prop Xan prop N + prop Xan notprop C + prop Xan prop L ++ notprop Xan Ket End ------------------------------------------------------------------ /\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp ------------------------------------------------------------------ Bra prop Xan + prop Lu prop Xan + prop Nd prop Xan ++ prop Cc prop Xan + notprop Ll notprop Xan ++ prop No prop Xan + notprop Cf Ket End ------------------------------------------------------------------ /\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp ------------------------------------------------------------------ Bra prop Lu + prop Xan prop Nd + prop Xan prop Cs ++ prop Xan notprop Lt + prop Xan prop Nl + prop Xan notprop Cc + prop Xan prop Lt ++ notprop Xan Ket End ------------------------------------------------------------------ /\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp ------------------------------------------------------------------ Bra prop Xwd + prop P prop Xwd + prop Po prop Xwd ++ prop Xsp prop Xan ++ prop Xsp prop Xsp ++ prop Xan prop Xsp ++ prop Xwd Ket End ------------------------------------------------------------------ /\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp ------------------------------------------------------------------ Bra prop Xwd + notprop P notprop Xwd + prop Po prop Xwd + notprop Xsp notprop Xan + prop Xsp prop Xsp + notprop Xan prop Xsp + notprop Xwd Ket End ------------------------------------------------------------------ /\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp ------------------------------------------------------------------ Bra prop Xwd + prop Po prop Xwd ++ prop Pc notprop Xwd + prop Po notprop Xwd + prop Pc prop Xwd + notprop Po prop Xwd + notprop Pc Ket End ------------------------------------------------------------------ /\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp ------------------------------------------------------------------ Bra prop Nl + prop Xan notprop Nl + prop Xan prop Nl ++ notprop Xan notprop Nl + notprop Xan Ket End ------------------------------------------------------------------ /\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp ------------------------------------------------------------------ Bra prop Xan + prop Nl notprop Xan ++ prop Nl prop Xan + notprop Nl notprop Xan + notprop Nl Ket End ------------------------------------------------------------------ /\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp ------------------------------------------------------------------ Bra prop Xan + prop Nd notprop Xan ++ prop Nd prop Xan + notprop Nd notprop Xan + notprop Nd Ket End ------------------------------------------------------------------ # End auto-possessification tests /\w+/B,utf,ucp,auto_callout ------------------------------------------------------------------ Bra Callout 255 0 3 prop Xwd ++ Callout 255 3 0 Ket End ------------------------------------------------------------------ abcd --->abcd +0 ^ \w+ +3 ^ ^ End of pattern 0: abcd /[\p{N}]?+/B,no_auto_possess ------------------------------------------------------------------ Bra [\p{N}]?+ Ket End ------------------------------------------------------------------ /[\p{L}ab]{2,3}+/B,no_auto_possess ------------------------------------------------------------------ Bra [ab\p{L}]{2,3}+ Ket End ------------------------------------------------------------------ /\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx ------------------------------------------------------------------ Bra \D+ extuni \d+ extuni \S+ extuni \s+ extuni \W+ extuni \w+ extuni \R+ extuni \H+ extuni \h+ extuni \V+ extuni \v+ extuni a+ extuni \x0a+ extuni Any+ extuni Ket End ------------------------------------------------------------------ /.+\X/Bsx ------------------------------------------------------------------ Bra AllAny+ extuni Ket End ------------------------------------------------------------------ /\X+$/Bmx ------------------------------------------------------------------ Bra extuni+ /m $ Ket End ------------------------------------------------------------------ /\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx ------------------------------------------------------------------ Bra extuni+ \D extuni+ \d extuni+ \S extuni+ \s extuni+ \W extuni+ \w extuni+ Any extuni+ \R extuni+ \H extuni+ \h extuni+ \V extuni+ \v extuni+ extuni extuni+ \Z extuni++ \z extuni+ $ Ket End ------------------------------------------------------------------ /\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp ------------------------------------------------------------------ Bra prop Nd ++ prop Xsp {0,5}+ = prop Xsp *+ notprop Xsp ? = prop Xwd {0,4}+ notprop Xwd *+ Ket End ------------------------------------------------------------------ /[RST]+/Bi,utf,ucp ------------------------------------------------------------------ Bra [R-Tr-t\x{17f}]++ Ket End ------------------------------------------------------------------ /[R-T]+/Bi,utf,ucp ------------------------------------------------------------------ Bra [R-Tr-t\x{17f}]++ Ket End ------------------------------------------------------------------ /[Q-U]+/Bi,utf,ucp ------------------------------------------------------------------ Bra [Q-Uq-u\x{17f}]++ Ket End ------------------------------------------------------------------ /^s?c/Iim,utf Capture group count = 0 Options: caseless multiline utf First code unit at start or follows newline Last code unit = 'c' (caseless) Subject length lower bound = 1 scat 0: sc /\X?abc/utf,no_start_optimize \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 0: A\x{300}abc /\x{100}\x{200}\K\x{300}/utf,startchar \x{100}\x{200}\x{300} 0: \x{100}\x{200}\x{300} ^^^^^^^^^^^^^^ # Test UTF characters in a substitution /ábc/utf,replace=XሴZ 123ábc123 1: 123X\x{1234}Z123 /(?<=abc)(|def)/g,utf,replace=<$0> 123abcáyzabcdef789abcሴqr 4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr /[A-`]/iB,utf ------------------------------------------------------------------ Bra [A-z\x{212a}\x{17f}] Ket End ------------------------------------------------------------------ abcdefghijklmno 0: a /(?<=\K\x{17f})/g,utf,aftertext,allow_lookaround_bsk \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} 0: \x{17f} 0+ \x{17f}\x{17f}\x{17f}\x{17f} 0: \x{17f} 0+ \x{17f}\x{17f}\x{17f} 0: \x{17f} 0+ \x{17f}\x{17f} 0: \x{17f} 0+ \x{17f} 0: \x{17f} 0+ /(?<=\K\x{17f})/altglobal,utf,aftertext,allow_lookaround_bsk \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} 0: \x{17f} 0+ \x{17f}\x{17f}\x{17f}\x{17f} 0: \x{17f} 0+ \x{17f}\x{17f}\x{17f} 0: \x{17f} 0+ \x{17f}\x{17f} 0: \x{17f} 0+ \x{17f} 0: \x{17f} 0+ "\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?<y>\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(\xbf(R))\x8a\X*?\x8a\xb\xd1^9\3*+(\xc1,\k'R'\xb4)\xcc(z\z(?J)(?'X'\x1b(\xb\xd1^9\?'3*+P{^Xan}+?\xff\+(\xc1.]k+\xb'Pm'\xb4)\xcc4f\xa7'\xd1V(?i:U,{2,2})'(?'X'))?-%--\x95$9*\4'|\xd1(\x9c''%\x94$9)#(?'R')3\x7?('P\xed7'\xa8\xb1^u\xeaw\1\0\0\(|(?1){7}.+[\p{Me}].\s\xdcC*^\x14?(?(<y>))(?<!^)$C((;*?(R*?))+(?(R)\x8a\X*?\x8a\xb\xd1^9\3*+|(\xc1,\k'R'\xb4)\xcc! z)\z(?JJ)(?'X';(\xb\xd1^9\?'3*+(\xc1.]k+\xb'Pm'\xb4))':(?'d')(?'RD'(d')|)|$)'|(?<x>\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?<y>)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5" Failed: error 122 at offset 1227: unmatched closing parenthesis /$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/ "(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'" Failed: error 162 at offset 113: subpattern name expected /[\pS#moq]/ = 0: = /(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark cxxxz 0: xxx MK: a\x{12345}b\x{09}(d)c /abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended abcd 1: x\x{824}y\x{6db}z(12\$34$$\x345$) /a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended a\x{e0}\x{101}\x{c0}\x{102} 1: a\x{c0}\x{101}\x{c0}\x{100}\x{e0}\x{101}\x{e0}\x{102}\x{e0}\x{103}ab\x{c0}\x{100}\x{f0}\x{161}Done /((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}> ab12cde 7: <not digit; letter><not digit; letter><digit; not a letter><digit; not a letter><not digit; letter><not digit; letter><not digit; letter> /(*UCP)(*UTF)[[:>:]]X/B ------------------------------------------------------------------ Bra \b Assert back Reverse prop Xwd Ket X Ket End ------------------------------------------------------------------ /abc/utf,replace=xyz abc\=zero_terminate 1: xyz /a[[:punct:]b]/ucp,bincode ------------------------------------------------------------------ Bra a [b[:punct:]] Ket End ------------------------------------------------------------------ /a[[:punct:]b]/utf,ucp,bincode ------------------------------------------------------------------ Bra a [b[:punct:]] Ket End ------------------------------------------------------------------ /a[b[:punct:]]/utf,ucp,bincode ------------------------------------------------------------------ Bra a [b[:punct:]] Ket End ------------------------------------------------------------------ /[[:^ascii:]]/utf,ucp,bincode ------------------------------------------------------------------ Bra [\x80-\xff] (neg) Ket End ------------------------------------------------------------------ /[[:^ascii:]\w]/utf,ucp,bincode ------------------------------------------------------------------ Bra [\x80-\xff\p{Xwd}\x{100}-\x{10ffff}] Ket End ------------------------------------------------------------------ /[\w[:^ascii:]]/utf,ucp,bincode ------------------------------------------------------------------ Bra [\x80-\xff\p{Xwd}\x{100}-\x{10ffff}] Ket End ------------------------------------------------------------------ /[^[:ascii:]\W]/utf,ucp,bincode ------------------------------------------------------------------ Bra [^\x00-\x7f\P{Xwd}] Ket End ------------------------------------------------------------------ \x{de} 0: \x{de} \x{200} 0: \x{200} \= Expect no match \x{300} No match \x{37e} No match /[[:^ascii:]a]/utf,ucp,bincode ------------------------------------------------------------------ Bra [a\x80-\xff] (neg) Ket End ------------------------------------------------------------------ /L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout ------------------------------------------------------------------ Bra Callout 255 0 14 L? Callout 255 14 0 Ket End ------------------------------------------------------------------ /L(?#(|++<!(2)?/B,utf,ucp,auto_callout ------------------------------------------------------------------ Bra Callout 255 0 14 L?+ Callout 255 14 0 Ket End ------------------------------------------------------------------ /(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/ Failed: error 114 at offset 39: missing closing parenthesis /[\D]/utf \x{1d7cf} 0: \x{1d7cf} /[\D\P{Nd}]/utf \x{1d7cf} 0: \x{1d7cf} /[^\D]/utf a9b 0: 9 \= Expect no match \x{1d7cf} No match /[^\D\P{Nd}]/utf a9b 0: 9 \x{1d7cf} 0: \x{1d7cf} \= Expect no match \x{10000} No match # Hex uses pattern length, not zero-terminated. This tests for overrunning # the given length of a pattern. /'(*UTF)'/hex /'#('/hex,extended,utf /a(?<=A\XB)/utf Failed: error 125 at offset 1: lookbehind assertion is not fixed length /ab(?<=A\RB)/utf Failed: error 125 at offset 2: lookbehind assertion is not fixed length /../utf,auto_callout \n\x{123}\x{123}\x{123}\x{123} --->\x{0a}\x{123}\x{123}\x{123}\x{123} +0 ^ . +0 ^ . +1 ^ ^ . +2 ^ ^ End of pattern 0: \x{123}\x{123} # This tests processing wide characters in extended mode. /XȀ/x,utf # These three test a bug fix that was not clearing up after a locale setting # when the test or a subsequent one matched a wide character. //locale=C /[\P{Yi}]/utf \x{2f000} 0: \x{2f000} /[\P{Yi}]/utf,locale=C \x{2f000} 0: \x{2f000} /^(?<!(?=))/B,utf ------------------------------------------------------------------ Bra ^ Assert back not Assert \x{10385c} Ket Ket Ket End ------------------------------------------------------------------ # Horizontal and vertical space lists ignore caseless /[\HH]/Bi,utf ------------------------------------------------------------------ Bra [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}] Ket End ------------------------------------------------------------------ /[^\HH]/Bi,utf ------------------------------------------------------------------ Bra [^\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{10ffff}] Ket End ------------------------------------------------------------------ //g,utf \=zero_terminate 0: /^(?1)\p{Nd}{3}(a)/ a123a 0: a123a 1: a /\p{Nd}{0,3}[\pL](*:abc)(?C1)xxx/callout_info Callout 1 x # --------------------------------------------------------------------------- # A bunch of tests that hit lines of code that others do not (at least when # these were created). /^[^a]{3,}?x/i,utf,no_start_optimize,no_auto_possess \= Expect no match bbb No match cc No match /^[ac]{3,}?x/i,utf,no_start_optimize,no_auto_possess \= Expect no match aaa\x{100} No match /^X\X/no_start_optimize,no_auto_possess \= Expect no match X No match /^X\p{L&}+?/no_start_optimize,no_auto_possess \= Expect no match X No match /^X\p{L}+?/no_start_optimize,no_auto_possess \= Expect no match X No match /^X\p{Lu}+?/no_start_optimize,no_auto_possess \= Expect no match X No match /^X\p{Arabic}+?/no_start_optimize,no_auto_possess \= Expect no match X No match /^X\p{Xan}+?/ucp,no_start_optimize,no_auto_possess \= Expect no match X No match /^X\s+?/ucp,no_start_optimize,no_auto_possess \= Expect no match X No match XX No match /^X\S+?/ucp,no_start_optimize,no_auto_possess XX 0: XX \= Expect no match X No match /^X\w+?/ucp,no_start_optimize,no_auto_possess \= Expect no match X No match /^X[^\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess \= Expect no match X No match /^X[\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess \= Expect no match X No match /^X\p{Xuc}+?/utf,no_start_optimize,no_auto_possess \= Expect no match X No match /^X.+?Z/s,utf,no_start_optimize,no_auto_possess \= Expect no match X No match /^X\R+?/utf,no_start_optimize,no_auto_possess \= Expect no match X No match /^X\H+?/utf,no_start_optimize,no_auto_possess \= Expect no match X No match /^X\V+?/utf,no_start_optimize,no_auto_possess \= Expect no match X No match /^X\s+?/utf,no_start_optimize,no_auto_possess \= Expect no match X No match XX No match /^X\S+?/utf,no_start_optimize,no_auto_possess \= Expect no match X No match /^X\p{Any}{1,3}?Z/s,no_start_optimize,no_auto_possess XYYYZ 0: XYYYZ \= Expect no match XY No match XYY No match XYYY No match XYYYYZ No match /^X\p{L&}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XY No match XY! No match /^X\p{L}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XY No match XY! No match /^X\p{Lu}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XY No match XY! No match /^X\P{Han}{1,3}?Z/s,utf,no_start_optimize,no_auto_possess \= Expect no match XY No match XY! No match XY\x{2f00}! No match /^X\p{Xan}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XY No match XY! No match /^X\p{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match X\n No match X\n! No match X\n\n! No match /^X\P{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XYY\n No match /^X\p{Xwd}{1,3}?Z/s,no_start_optimize,no_auto_possess \= Expect no match XY No match XY! No match XYY! No match /^X\x{b5}+?Z/i,utf,no_start_optimize,no_auto_possess \= Expect no match X No match X\x{b5} No match X\x{b5}\x{b5}Y No match /^X\p{Xuc}+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X No match X$ No match X@@Y No match /(*CRLF)^X.+?Z/utf,no_start_optimize,no_auto_possess \= Expect partial match XYY\r\=ph Partial match: XYY\x{0d} \= Expect no match X No match /^X.+?Z/s,utf,no_start_optimize,no_auto_possess \= Expect no match X No match XYY No match /^X\R+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X\nX No match X\n\rX No match X\n\r\nX No match X\n\n No match X\n\x{0c} No match /(*BSR_ANYCRLF)^X\R+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X\nX No match X\n\rX No match X\n\r\nX No match X\n\n No match X\n\x{0c} No match /^X\H+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match XY\t No match XYY No match /^X\h+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X\t\t No match X\tY No match /^X\V+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match XY\n No match XYY No match /^X\v+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X\n\n No match X\nY No match /^X\D+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match XY9 No match XYY No match /^X\d+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X99 No match X9Y No match /^X\S+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match XY\n No match XYY No match /^X\s+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X\n\n No match X\nY No match /^X\W+?Z/utf,no_start_optimize,no_auto_possess \= Expect no match X.A No match X++ No match /^X\p{L&}{1,3}Z/no_start_optimize,no_auto_possess \= Expect no match XY No match XY! No match /^X\p{L}{1,3}Z/no_start_optimize,no_auto_possess \= Expect no match XY No match /^X\p{Xan}{1,3}Z/no_start_optimize,no_auto_possess \= Expect no match XY No match /^X\P{Xsp}{1,3}Z/no_start_optimize,no_auto_possess \= Expect no match XYY No match /^X\p{Xuc}+Z/utf,no_start_optimize,no_auto_possess \= Expect no match X$ No match # ---------------------------------------------------------------------- # These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option. /\x{d800}/B,utf,bad_escape_is_literal ------------------------------------------------------------------ Bra x{d800} Ket End ------------------------------------------------------------------ /\ud800/B,utf,alt_bsux,bad_escape_is_literal ------------------------------------------------------------------ Bra ud800 Ket End ------------------------------------------------------------------ # ---------------------------------------------------------------------- /Aሴ+B/literal,utf,no_utf_check Aሴ+B 0: A\x{1234}+B # These are here because I upgraded to Unicode 10.0.0 before Perl did, so it # doesn't recognize all these scripts. In time these three tests can be moved # to test 4. /^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+) (\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+) (\p{Zanabazar_Square}+)/x,utf \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47} 0: \x{1e900}\x{1e924}\x{1e953}\x{11c00}\x{11c2d}\x{11c3e}\x{11c70}\x{11c77}\x{11cab}\x{11400}\x{1142f}\x{11455}\x{104b0}\x{104d8}\x{104fb}\x{16fe0}\x{18800}\x{18af2}\x{11d00}\x{11d3a}\x{11d59}\x{16fe1}\x{1b170}\x{1b2fb}\x{11a50}\x{11a58}\x{11aa2}\x{11a00}\x{11a07}\x{11a47} 1: \x{1e900}\x{1e924}\x{1e953} 2: \x{11c00}\x{11c2d}\x{11c3e} 3: \x{11c70}\x{11c77}\x{11cab} 4: \x{11400}\x{1142f}\x{11455} 5: \x{104b0}\x{104d8}\x{104fb} 6: \x{16fe0}\x{18800}\x{18af2} 7: \x{11d00}\x{11d3a}\x{11d59} 8: \x{16fe1}\x{1b170}\x{1b2fb} 9: \x{11a50}\x{11a58}\x{11aa2} 10: \x{11a00}\x{11a07}\x{11a47} /^\x{1E900}\x{104B0}/i,utf \x{1E900}\x{104B0} 0: \x{1e900}\x{104b0} \x{1E922}\x{104D8} 0: \x{1e922}\x{104d8} /^(?:(\X)(?C))+$/utf \x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where Callout 0: last capture = 1 1: \x{1e900} Callout 0: last capture = 1 1: \x{1e924} Callout 0: last capture = 1 1: \x{1e953} Callout 0: last capture = 1 1: \x{11c00} Callout 0: last capture = 1 1: \x{11c2d}\x{11c3e} Callout 0: last capture = 1 1: \x{11c70} Callout 0: last capture = 1 1: \x{11c77}\x{11cab} Callout 0: last capture = 1 1: \x{11400} Callout 0: last capture = 1 1: \x{1142f} Callout 0: last capture = 1 1: \x{11455} Callout 0: last capture = 1 1: \x{104b0} Callout 0: last capture = 1 1: \x{104d8} Callout 0: last capture = 1 1: \x{104fb} Callout 0: last capture = 1 1: \x{16fe0} Callout 0: last capture = 1 1: \x{18800} Callout 0: last capture = 1 1: \x{18af2} Callout 0: last capture = 1 1: \x{11d00}\x{11d3a} Callout 0: last capture = 1 1: \x{11d59} Callout 0: last capture = 1 1: \x{16fe1} Callout 0: last capture = 1 1: \x{1b170} Callout 0: last capture = 1 1: \x{1b2fb} Callout 0: last capture = 1 1: \x{11a50}\x{11a58} Callout 0: last capture = 1 1: \x{11aa2} Callout 0: last capture = 1 1: \x{11a00}\x{11a07}\x{11a47} 0: \x{1e900}\x{1e924}\x{1e953}\x{11c00}\x{11c2d}\x{11c3e}\x{11c70}\x{11c77}\x{11cab}\x{11400}\x{1142f}\x{11455}\x{104b0}\x{104d8}\x{104fb}\x{16fe0}\x{18800}\x{18af2}\x{11d00}\x{11d3a}\x{11d59}\x{16fe1}\x{1b170}\x{1b2fb}\x{11a50}\x{11a58}\x{11aa2}\x{11a00}\x{11a07}\x{11a47} 1: \x{11a00}\x{11a07}\x{11a47} # Similarly for Unicode 11.0.0 /^(\p{Dogra}+)(\p{Gunjala_Gondi}+)(\p{Hanifi_Rohingya}+)(\p{Makasar}+) (\p{Medefaidrin}+)(\p{Old_Sogdian}+)(\p{Sogdian}+)/x,utf \x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30} 0: \x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30} 1: \x{11800} 2: \x{11da9} 3: \x{10d27} 4: \x{11ee0} 5: \x{16e48} 6: \x{10f27} 7: \x{10f30} # These two are here because of differences from Perl. /^\X/utf A\x{200d}B A ZWJ 0: A\x{200d} \x{261d}\x{261d}B Extended_Pictographic Extended_Pictographic 0: \x{261d}\x{261d} \x{261D}\x{1F3FB}B Extended_Pictographic Extend 0: \x{261d}\x{1f3fb} \x{1F1E6}\x{1F1E7}B RegionalIndicator RegionalIndicator 0: \x{1f1e6}\x{1f1e7} \x{261D}\x{1F3FB}\x{261d}B Extended_Pictographic Extend E-P 0: \x{261d}\x{1f3fb}\x{261d} \x{261D}\x{1F3FB}\x{200d}\x{261d}B Extended_Pictographic Extend ZWJ E-P 0: \x{261d}\x{1f3fb}\x{200d}\x{261d} # Regional indicators /^(\X)(\X)/utf,aftertext \x{1F1E6}\x{1F1E7}\x{1F1E7}B 0: \x{1f1e6}\x{1f1e7}\x{1f1e7} 0+ B 1: \x{1f1e6}\x{1f1e7} 2: \x{1f1e7} \x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B 0: \x{1f1e6}\x{1f1e7}\x{1f1e7}\x{1f1e6} 0+ B 1: \x{1f1e6}\x{1f1e7} 2: \x{1f1e7}\x{1f1e6} # More differences from Perl /^[\p{Arabic}]/utf \= Expect no match \x{650} No match \x{651} No match \x{652} No match \x{653} No match \x{654} No match \x{655} No match /^\p{Common}/utf \x{60c} 0: \x{60c} \x{61f} 0: \x{61f} \x{964} 0: \x{964} \x{965} 0: \x{965} /^\p{Inherited}/utf \x{64b} 0: \x{64b} \x{654} 0: \x{654} \x{655} 0: \x{655} \x{1D1AA} 0: \x{1d1aa} /\N{U+}/ Failed: error 193 at offset 2: \N{U+dddd} is supported only in Unicode (UTF) mode /\N{U+}/utf Failed: error 178 at offset 5: digits missing in \x{} or \o{} or \N{U+} /\N{U}/ Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u # This tests the non-UTF Unicode NEL pattern whitespace character, only # recognized by PCRE2 with /x when there is Unicode support. /A �B/x AB 0: AB # This tests Unicode Pattern White Space characters in verb names when they # are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters # with code points greater than 255 between A, B, and C in the pattern. /(*: AB C)abc/x,utf,mark,alt_verbnames abc 0: abc MK: ABC # Script run tests: auto-possessification /^(*sr:.*)/B,utf ------------------------------------------------------------------ Bra ^ Script run Any* Ket Ket End ------------------------------------------------------------------ paypаl.com A classic example of why script run checks are a good thing 0: payp /^(*sr:.*(*ACCEPT))/utf paypаl.com But *ACCEPT breaks things 0: payp\x{430}l.com But *ACCEPT breaks things /^(*sr:\x{2e80}*)/B,utf ------------------------------------------------------------------ Bra ^ Script run \x{2e80}*+ Ket Ket End ------------------------------------------------------------------ /^(*sr:\x{2e80}*)\x{2e80}/B,utf ------------------------------------------------------------------ Bra ^ Script run \x{2e80}* Ket \x{2e80} Ket End ------------------------------------------------------------------ /(?<!)(*sr:)/B ------------------------------------------------------------------ Bra Assert back not Ket Script run Ket Ket End ------------------------------------------------------------------ /(?<=abc(?=X(*sr:BXY)CCC)XBXYCCC)./B ------------------------------------------------------------------ Bra Assert back Reverse abc Assert X Script run BXY Ket CCC Ket XBXYCCC Ket Any Ket End ------------------------------------------------------------------ abcXBXYCCC! 0: ! # Some script run patterns are broken in Perl 5.28.0. These can be moved into # test 4 when a mended version of Perl is released. /^(*sr:.{4})/utf \x{0980}12\x{0993} Bengali Common-digits Bengali 0: \x{980}12\x{993} \x{0780}12\x{07b1} Thaana Common-digits Thaana 0: \x{780}12\x{7b1} \x{0e01}12\x{0e5b} Thai Common-digits Thai 0: \x{e01}12\x{e5b} \x{1780}12\x{19ff} Khmer Common-digits Khmer 0: \x{1780}12\x{19ff} \x{0904}12\x{0939} Devanagari Common-digits Devanagari 0: \x{904}12\x{939} A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin 0: A\x{ff10}\x{ff19}B A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin 0: A\x{1d7ce}\x{1d7cf}B # These ones involve non-ASCII but nevertheless Common digits. As of October # 2018 even blead Perl wasn't handling all of these - but is going to. /^(*sr:.{4})/utf A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin 0: A\x{ff10}\x{ff19}B \x{ff10}\x{ff19}.. Common-notascii-digits Common Common 0: \x{ff10}\x{ff19}.. A\x{ff10}BC Latin Common-notascii-digit Latin Latin 0: A\x{ff10}BC A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin 0: A\x{1d7ce}\x{1d7cf}B \x{1d7ce}\x{1d7cf},, fancy-common-digits Common Common 0: \x{1d7ce}\x{1d7cf},, A\x{1d7ce}BC Latin fancy-common-digit Latin Latin 0: A\x{1d7ce}BC # Some Unicode 12.1.0 new script characters /\p{Elymaic}\p{Nandinagari}\p{Nyiakeng_Puachue_Hmong}\p{Wancho}/utf \x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1} 0: \x{10fe5}\x{119ac}\x{1e10e}\x{1e2d1} # Some Unicode 13.0.0 new script characters /\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf \x{10FB0}\x{11900}\x{18B00}\x{10E80} 0: \x{10fb0}\x{11900}\x{18b00}\x{10e80} # ------- # Test reference and errors in non-ASCII characters in group names /(?'𑠅ABC'...)/I,utf Capture group count = 1 Named capture groups: 𑠅ABC 1 Options: utf Subject length lower bound = 3 abcde\=copy=𑠅ABC 0: abc 1: abc C abc (3) 𑠅ABC (group 1) # Bad ones /(?'AB၌C'...)\g{AB၌C}/utf Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?) /(?'٠ABC'...)/utf Failed: error 144 at offset 3: subpattern name must start with a non-digit /(?'²ABC'...)/utf Failed: error 162 at offset 3: subpattern name expected /(?'X²ABC'...)/utf Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) # ------- /\p{Any}*xyz/I Capture group count = 0 Compile options: <none> Overall options: anchored Last code unit = 'z' Subject length lower bound = 3 /(|�)7/caseless,ucp /(\xc1)\1/i,ucp \xc1\xe1\=no_jit 0: \xc1\xe1 1: \xc1 /\p{L&}+\p{bidi_control}/B ------------------------------------------------------------------ Bra prop Lc ++ prop Bidicontrol Ket End ------------------------------------------------------------------ /\p{bidi_control}+\p{L&}/B ------------------------------------------------------------------ Bra prop Bidicontrol ++ prop Lc Ket End ------------------------------------------------------------------ # End of testinput5