diff --git a/testdata/testinput10 b/testdata/testinput10 index effdd1b..d14e222 100644 --- a/testdata/testinput10 +++ b/testdata/testinput10 @@ -395,4 +395,62 @@ /s+/Ii,utf SSss\x{17f} +/\x{100}*A/IB,utf + A + +/\x{100}*\d(?R)/IB,utf + +/[Z\x{100}]/IB,utf + Z\x{100} + \x{100} + \x{100}Z + *** Failers + +/[z-\x{100}]/IB,utf + +/[z\Qa-d]Ä€\E]/IB,utf + \x{100} + Ä€ + +/[ab\x{100}]abc(xyz(?1))/IB,utf + +/\x{100}*\s/IB,utf + +/\x{100}*\d/IB,utf + +/\x{100}*\w/IB,utf + +/\x{100}*\D/IB,utf + +/\x{100}*\S/IB,utf + +/\x{100}*\W/IB,utf + +/[\x{105}-\x{109}]/IBi,utf + \x{104} + \x{105} + \x{109} + ** Failers + \x{100} + \x{10a} + +/[z-\x{100}]/IBi,utf + Z + z + \x{39c} + \x{178} + | + \x{80} + \x{ff} + \x{100} + \x{101} + ** Failers + \x{102} + Y + y + +/[z-\x{100}]/IBi,utf + +/\x{3a3}B/IBi,utf + # End of testinput10 diff --git a/testdata/testinput12 b/testdata/testinput12 index e47c9d2..73b18bc 100644 --- a/testdata/testinput12 +++ b/testdata/testinput12 @@ -1,6 +1,6 @@ -# This set of tests is for UTF-16 and UTF-32 support, and is relevant only to -# the 16-bit and 32-bit libraries. The output is different for each library, -# so there are separate output files. +# This set of tests is for UTF-16 and UTF-32 support, including Unicode +# properties. It is relevant only to the 16-bit and 32-bit libraries. The +# output is different for each library, so there are separate output files. /ÃÃÃxxx/IB,utf,no_utf_check @@ -329,4 +329,62 @@ /\C/utf \x{110000} +/\x{100}*A/IB,utf + A + +/\x{100}*\d(?R)/IB,utf + +/[Z\x{100}]/IB,utf + Z\x{100} + \x{100} + \x{100}Z + *** Failers + +/[z-\x{100}]/IB,utf + +/[z\Qa-d]Ä€\E]/IB,utf + \x{100} + Ä€ + +/[ab\x{100}]abc(xyz(?1))/IB,utf + +/\x{100}*\s/IB,utf + +/\x{100}*\d/IB,utf + +/\x{100}*\w/IB,utf + +/\x{100}*\D/IB,utf + +/\x{100}*\S/IB,utf + +/\x{100}*\W/IB,utf + +/[\x{105}-\x{109}]/IBi,utf + \x{104} + \x{105} + \x{109} + ** Failers + \x{100} + \x{10a} + +/[z-\x{100}]/IBi,utf + Z + z + \x{39c} + \x{178} + | + \x{80} + \x{ff} + \x{100} + \x{101} + ** Failers + \x{102} + Y + y + +/[z-\x{100}]/IBi,utf + +/\x{3a3}B/IBi,utf + # End of testinput12 diff --git a/testdata/testinput2 b/testdata/testinput2 index bdcd801..dde243d 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -10,6 +10,20 @@ #forbid_utf +# Test binary zeroes in the pattern + +# /a\0B/ where 0 is a binary zero +/61 5c 00 62/B,hex + a\x{0}b + +# /a0b/ where 0 is a binary zero +/61 00 62/B,hex + a\x{0}b + +# /(?#B0C)DE/ where 0 is a binary zero +/28 3f 23 42 00 43 29 44 45/B,hex + DE + /(a)b|/I /abc/I diff --git a/testdata/testinput5 b/testdata/testinput5 index 935cf98..a2431b1 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -1,6 +1,7 @@ # This set of tests checks the API, internals, and non-Perl stuff for UTF -# support, excluding Unicode properties. However, tests that give different -# results in 8-bit and 16-bit modes are excluded (see tests 16 and 17). +# support, including Unicode properties. However, tests that give different +# results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and +# 12). /\x{110000}/IB,utf @@ -1566,4 +1567,7 @@ /^s?c/Iim,utf scat +/\X?abc/utf,no_start_optimize +\xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 + # End of testinput5 diff --git a/testdata/testoutput10 b/testdata/testoutput10 index 5e83693..f5e273d 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -1270,4 +1270,333 @@ Subject length lower bound = 1 SSss\x{17f} 0: SSss\x{17f} +/\x{100}*A/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + A + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: A \xc4 +Last code unit = 'A' +Subject length lower bound = 1 + A + 0: A + +/\x{100}*\d(?R)/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \d + Recurse + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4 +No last code unit +Subject length lower bound = 1 + +/[Z\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + [Z\x{100}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: Z \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd + \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc + \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb + \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa + \xfb \xfc \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + Z\x{100} + 0: Z + \x{100} + 0: \x{100} + \x{100}Z + 0: \x{100} + *** Failers +No match + +/[z-\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + [z-\xff\x{100}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 + \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 + \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 + \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 + \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + +/[z\Qa-d]Ä€\E]/IB,utf +------------------------------------------------------------------ + Bra + [\-\]adz\x{100}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: - ] a d z \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc + \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb + \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea + \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 + \xfa \xfb \xfc \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + \x{100} + 0: \x{100} + Ä€ + 0: \x{100} + +/[ab\x{100}]abc(xyz(?1))/IB,utf +------------------------------------------------------------------ + Bra + [ab\x{100}] + abc + CBra 1 + xyz + Recurse + Ket + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Options: utf +Starting code units: a b \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd + \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc + \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb + \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa + \xfb \xfc \xfd \xfe \xff +Last code unit = 'z' +Subject length lower bound = 7 + +/\x{100}*\s/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \s + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc4 +No last code unit +Subject length lower bound = 1 + +/\x{100}*\d/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \d + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4 +No last code unit +Subject length lower bound = 1 + +/\x{100}*\w/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \w + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + \xc4 +No last code unit +Subject length lower bound = 1 + +/\x{100}*\D/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \D + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c + d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff +No last code unit +Subject length lower bound = 1 + +/\x{100}*\S/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \S + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 + \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 + \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 + \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 + \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + +/\x{100}*\W/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \W + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ [ \ ] ^ ` { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 + \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 + \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 + \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 + \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + +/[\x{105}-\x{109}]/IBi,utf +------------------------------------------------------------------ + Bra + [\x{104}-\x{109}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf +Starting code units: \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce + \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd + \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec + \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb + \xfc \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + \x{104} + 0: \x{104} + \x{105} + 0: \x{105} + \x{109} + 0: \x{109} + ** Failers +No match + \x{100} +No match + \x{10a} +No match + +/[z-\x{100}]/IBi,utf +------------------------------------------------------------------ + Bra + [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf +Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 + \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 + \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 + \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 + \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + Z + 0: Z + z + 0: z + \x{39c} + 0: \x{39c} + \x{178} + 0: \x{178} + | + 0: | + \x{80} + 0: \x{80} + \x{ff} + 0: \x{ff} + \x{100} + 0: \x{100} + \x{101} + 0: \x{101} + ** Failers +No match + \x{102} +No match + Y +No match + y +No match + +/[z-\x{100}]/IBi,utf +------------------------------------------------------------------ + Bra + [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf +Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 + \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 + \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 + \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 + \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + +/\x{3a3}B/IBi,utf +------------------------------------------------------------------ + Bra + clist 03a3 03c2 03c3 + /i B + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf +Starting code units: \xce \xcf +Last code unit = 'B' (caseless) +Subject length lower bound = 2 + # End of testinput10 diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 index 78dfd62..7eeb728 100644 --- a/testdata/testoutput12-16 +++ b/testdata/testoutput12-16 @@ -1,6 +1,6 @@ -# This set of tests is for UTF-16 and UTF-32 support, and is relevant only to -# the 16-bit and 32-bit libraries. The output is different for each library, -# so there are separate output files. +# This set of tests is for UTF-16 and UTF-32 support, including Unicode +# properties. It is relevant only to the 16-bit and 32-bit libraries. The +# output is different for each library, so there are separate output files. /ÃÃÃxxx/IB,utf,no_utf_check ** Failed: invalid UTF-8 string cannot be converted to 16-bit string @@ -1156,4 +1156,344 @@ Failed: error 134 at offset 10: character code point value in \x{} or \o{} is to \x{110000} ** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16 +/\x{100}*A/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + A + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: A \xff +Last code unit = 'A' +Subject length lower bound = 1 + A + 0: A + +/\x{100}*\d(?R)/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \d + Recurse + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff +No last code unit +Subject length lower bound = 1 + +/[Z\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + [Z\x{100}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: Z \xff +No last code unit +Subject length lower bound = 1 + Z\x{100} + 0: Z + \x{100} + 0: \x{100} + \x{100}Z + 0: \x{100} + *** Failers +No match + +/[z-\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + [z-\xff\x{100}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 + \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 + \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 + \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 + \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 + \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 + \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 + \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 + \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + +/[z\Qa-d]Ä€\E]/IB,utf +------------------------------------------------------------------ + Bra + [\-\]adz\x{100}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: - ] a d z \xff +No last code unit +Subject length lower bound = 1 + \x{100} + 0: \x{100} + Ä€ + 0: \x{100} + +/[ab\x{100}]abc(xyz(?1))/IB,utf +------------------------------------------------------------------ + Bra + [ab\x{100}] + abc + CBra 1 + xyz + Recurse + Ket + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Options: utf +Starting code units: a b \xff +Last code unit = 'z' +Subject length lower bound = 7 + +/\x{100}*\s/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \s + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff +No last code unit +Subject length lower bound = 1 + +/\x{100}*\d/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \d + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff +No last code unit +Subject length lower bound = 1 + +/\x{100}*\w/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \w + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + \xff +No last code unit +Subject length lower bound = 1 + +/\x{100}*\D/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \D + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c + d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 + \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 + \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 + \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf + \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe + \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd + \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc + \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb + \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa + \xfb \xfc \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + +/\x{100}*\S/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \S + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 + \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 + \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 + \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 + \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf + \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde + \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed + \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc + \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + +/\x{100}*\W/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \W + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 + \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 + \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 + \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 + \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 + \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 + \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 + \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 + \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + +/[\x{105}-\x{109}]/IBi,utf +------------------------------------------------------------------ + Bra + [\x{104}-\x{109}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf +Starting code units: \xff +No last code unit +Subject length lower bound = 1 + \x{104} + 0: \x{104} + \x{105} + 0: \x{105} + \x{109} + 0: \x{109} + ** Failers +No match + \x{100} +No match + \x{10a} +No match + +/[z-\x{100}]/IBi,utf +------------------------------------------------------------------ + Bra + [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf +Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 + \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 + \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff +No last code unit +Subject length lower bound = 1 + Z + 0: Z + z + 0: z + \x{39c} + 0: \x{39c} + \x{178} + 0: \x{178} + | + 0: | + \x{80} + 0: \x{80} + \x{ff} + 0: \x{ff} + \x{100} + 0: \x{100} + \x{101} + 0: \x{101} + ** Failers +No match + \x{102} +No match + Y +No match + y +No match + +/[z-\x{100}]/IBi,utf +------------------------------------------------------------------ + Bra + [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf +Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 + \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 + \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff +No last code unit +Subject length lower bound = 1 + +/\x{3a3}B/IBi,utf +------------------------------------------------------------------ + Bra + clist 03a3 03c2 03c3 + /i B + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf +Starting code units: \xff +Last code unit = 'B' (caseless) +Subject length lower bound = 2 + # End of testinput12 diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 index 9f7393e..70642c8 100644 --- a/testdata/testoutput12-32 +++ b/testdata/testoutput12-32 @@ -1,6 +1,6 @@ -# This set of tests is for UTF-16 and UTF-32 support, and is relevant only to -# the 16-bit and 32-bit libraries. The output is different for each library, -# so there are separate output files. +# This set of tests is for UTF-16 and UTF-32 support, including Unicode +# properties. It is relevant only to the 16-bit and 32-bit libraries. The +# output is different for each library, so there are separate output files. /ÃÃÃxxx/IB,utf,no_utf_check ** Failed: invalid UTF-8 string cannot be converted to 32-bit string @@ -1154,4 +1154,344 @@ Failed: error 134 at offset 10: character code point value in \x{} or \o{} is to \x{110000} Failed: error -28: UTF-32 error: code points greater than 0x10ffff are not defined +/\x{100}*A/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + A + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: A \xff +Last code unit = 'A' +Subject length lower bound = 1 + A + 0: A + +/\x{100}*\d(?R)/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \d + Recurse + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff +No last code unit +Subject length lower bound = 1 + +/[Z\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + [Z\x{100}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: Z \xff +No last code unit +Subject length lower bound = 1 + Z\x{100} + 0: Z + \x{100} + 0: \x{100} + \x{100}Z + 0: \x{100} + *** Failers +No match + +/[z-\x{100}]/IB,utf +------------------------------------------------------------------ + Bra + [z-\xff\x{100}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 + \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 + \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 + \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 + \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 + \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 + \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 + \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 + \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + +/[z\Qa-d]Ä€\E]/IB,utf +------------------------------------------------------------------ + Bra + [\-\]adz\x{100}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: - ] a d z \xff +No last code unit +Subject length lower bound = 1 + \x{100} + 0: \x{100} + Ä€ + 0: \x{100} + +/[ab\x{100}]abc(xyz(?1))/IB,utf +------------------------------------------------------------------ + Bra + [ab\x{100}] + abc + CBra 1 + xyz + Recurse + Ket + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 1 +Options: utf +Starting code units: a b \xff +Last code unit = 'z' +Subject length lower bound = 7 + +/\x{100}*\s/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \s + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff +No last code unit +Subject length lower bound = 1 + +/\x{100}*\d/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \d + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff +No last code unit +Subject length lower bound = 1 + +/\x{100}*\w/IB,utf +------------------------------------------------------------------ + Bra + \x{100}*+ + \w + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P + Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z + \xff +No last code unit +Subject length lower bound = 1 + +/\x{100}*\D/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \D + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c + d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 + \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 + \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 + \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf + \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe + \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd + \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc + \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb + \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa + \xfb \xfc \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + +/\x{100}*\S/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \S + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f + \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e + \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C + D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h + i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 + \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 + \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 + \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 + \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 + \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf + \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde + \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed + \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc + \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + +/\x{100}*\W/IB,utf +------------------------------------------------------------------ + Bra + \x{100}* + \W + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: utf +Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a + \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 + \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = > + ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 + \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 + \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 + \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 + \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 + \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 + \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 + \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 + \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff +No last code unit +Subject length lower bound = 1 + +/[\x{105}-\x{109}]/IBi,utf +------------------------------------------------------------------ + Bra + [\x{104}-\x{109}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf +Starting code units: \xff +No last code unit +Subject length lower bound = 1 + \x{104} + 0: \x{104} + \x{105} + 0: \x{105} + \x{109} + 0: \x{109} + ** Failers +No match + \x{100} +No match + \x{10a} +No match + +/[z-\x{100}]/IBi,utf +------------------------------------------------------------------ + Bra + [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf +Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 + \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 + \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff +No last code unit +Subject length lower bound = 1 + Z + 0: Z + z + 0: z + \x{39c} + 0: \x{39c} + \x{178} + 0: \x{178} + | + 0: | + \x{80} + 0: \x{80} + \x{ff} + 0: \x{ff} + \x{100} + 0: \x{100} + \x{101} + 0: \x{101} + ** Failers +No match + \x{102} +No match + Y +No match + y +No match + +/[z-\x{100}]/IBi,utf +------------------------------------------------------------------ + Bra + [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf +Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 + \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 + \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 + \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 + \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 + \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 + \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 + \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef + \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe + \xff +No last code unit +Subject length lower bound = 1 + +/\x{3a3}B/IBi,utf +------------------------------------------------------------------ + Bra + clist 03a3 03c2 03c3 + /i B + Ket + End +------------------------------------------------------------------ +Capturing subpattern count = 0 +Options: caseless utf +Starting code units: \xff +Last code unit = 'B' (caseless) +Subject length lower bound = 2 + # End of testinput12 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 3cb8a6c..e649e11 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -10,6 +10,41 @@ #forbid_utf +# Test binary zeroes in the pattern + +# /a\0B/ where 0 is a binary zero +/61 5c 00 62/B,hex +------------------------------------------------------------------ + Bra + a\x00b + Ket + End +------------------------------------------------------------------ + a\x{0}b + 0: a\x00b + +# /a0b/ where 0 is a binary zero +/61 00 62/B,hex +------------------------------------------------------------------ + Bra + a\x00b + Ket + End +------------------------------------------------------------------ + a\x{0}b + 0: a\x00b + +# /(?#B0C)DE/ where 0 is a binary zero +/28 3f 23 42 00 43 29 44 45/B,hex +------------------------------------------------------------------ + Bra + DE + Ket + End +------------------------------------------------------------------ + DE + 0: DE + /(a)b|/I Capturing subpattern count = 1 May match empty string diff --git a/testdata/testoutput5 b/testdata/testoutput5 index d58cdb8..d1bb20a 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -1,6 +1,7 @@ # This set of tests checks the API, internals, and non-Perl stuff for UTF -# support, excluding Unicode properties. However, tests that give different -# results in 8-bit and 16-bit modes are excluded (see tests 16 and 17). +# support, including Unicode properties. However, tests that give different +# results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and +# 12). /\x{110000}/IB,utf @@ -3960,4 +3961,8 @@ Subject length lower bound = 1 scat 0: sc +/\X?abc/utf,no_start_optimize +\xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 + 0: A\x{300}abc + # End of testinput5