Adjust tests for PCRE/Perl anomalies in character properties & fix one bug.
This commit is contained in:
parent
fd555f266c
commit
f40fba5dc8
|
@ -470,6 +470,9 @@ general substitute of a Unicode property escape (\p or \P). However, for some
|
|||
POSIX classes (e.g. graph, print, punct) a special property code is compiled
|
||||
directly. */
|
||||
|
||||
static const PCRE2_UCHAR string_pCc[] = {
|
||||
CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
|
||||
CHAR_C, CHAR_c, CHAR_RIGHT_CURLY_BRACKET, '\0' };
|
||||
static const PCRE2_UCHAR string_pL[] = {
|
||||
CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
|
||||
CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
|
||||
|
@ -487,6 +490,9 @@ static const PCRE2_UCHAR string_h[] = {
|
|||
static const PCRE2_UCHAR string_pXps[] = {
|
||||
CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
|
||||
CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
|
||||
static const PCRE2_UCHAR string_PCc[] = {
|
||||
CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
|
||||
CHAR_C, CHAR_c, CHAR_RIGHT_CURLY_BRACKET, '\0' };
|
||||
static const PCRE2_UCHAR string_PL[] = {
|
||||
CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
|
||||
CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
|
||||
|
@ -512,7 +518,7 @@ static PCRE2_SPTR posix_substitutes[] = {
|
|||
string_pXan, /* alnum */
|
||||
NULL, /* ascii */
|
||||
string_h, /* blank */
|
||||
NULL, /* cntrl */
|
||||
string_pCc, /* cntrl */
|
||||
string_pNd, /* digit */
|
||||
NULL, /* graph */
|
||||
NULL, /* print */
|
||||
|
@ -527,7 +533,7 @@ static PCRE2_SPTR posix_substitutes[] = {
|
|||
string_PXan, /* ^alnum */
|
||||
NULL, /* ^ascii */
|
||||
string_H, /* ^blank */
|
||||
NULL, /* ^cntrl */
|
||||
string_PCc, /* ^cntrl */
|
||||
string_PNd, /* ^digit */
|
||||
NULL, /* ^graph */
|
||||
NULL, /* ^print */
|
||||
|
|
|
@ -389,6 +389,11 @@ other. NOTE: The values also appear in pcre2_jit_compile.c. */
|
|||
|
||||
#ifndef EBCDIC
|
||||
|
||||
/* Character U+180E (Mongolian Vowel Separator) is not included in the list of
|
||||
spaces in the Unicode file PropList.txt, and Perl does not recognize it as a
|
||||
space. However, in many other sources it is listed as a space and has been in
|
||||
PCRE for a long time. */
|
||||
|
||||
#define HSPACE_LIST \
|
||||
CHAR_HT, CHAR_SPACE, 0xa0, \
|
||||
0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
|
||||
|
|
|
@ -1139,7 +1139,6 @@
|
|||
\x{06e9}
|
||||
\x{060b}
|
||||
** Failers
|
||||
\x{061c}
|
||||
X\x{06e9}
|
||||
|
||||
/^[\P{Yi}]/utf
|
||||
|
@ -1492,7 +1491,7 @@
|
|||
>\x{20}\x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{9}\x{b}
|
||||
|
||||
/^>[[:blank:]]*/utf,ucp
|
||||
>\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
|
||||
>\x{20}\x{a0}\x{1680}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
|
||||
|
||||
/^[[:alpha:]]*/utf,ucp
|
||||
Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}
|
||||
|
@ -2045,11 +2044,11 @@
|
|||
|
||||
/^A\s+Z/utf,ucp
|
||||
A\x{2005}Z
|
||||
A\x{85}\x{180e}\x{2005}Z
|
||||
A\x{85}\x{2005}Z
|
||||
|
||||
/^A[\s]+Z/utf,ucp
|
||||
A\x{2005}Z
|
||||
A\x{85}\x{180e}\x{2005}Z
|
||||
A\x{85}\x{2005}Z
|
||||
|
||||
/^[[:graph:]]+$/utf,ucp
|
||||
Letter:ABC
|
||||
|
@ -2075,17 +2074,11 @@
|
|||
\x{20}
|
||||
\x{85}
|
||||
\x{a0}
|
||||
\x{61c}
|
||||
\x{1680}
|
||||
\x{180e}
|
||||
\x{2028}
|
||||
\x{2029}
|
||||
\x{202f}
|
||||
\x{2065}
|
||||
\x{2066}
|
||||
\x{2067}
|
||||
\x{2068}
|
||||
\x{2069}
|
||||
\x{3000}
|
||||
\x{e0002}
|
||||
\x{e001f}
|
||||
|
@ -2103,7 +2096,6 @@
|
|||
Punctuation:\x{66a},;
|
||||
Symbol:\x{6de}<>\x{fffc}
|
||||
Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f}
|
||||
\x{180e}
|
||||
\x{200b}\x{200c}\x{200d}\x{200e}\x{200f}
|
||||
\x{202a}\x{202b}\x{202c}\x{202d}\x{202e}
|
||||
\x{202f}
|
||||
|
@ -2119,14 +2111,9 @@
|
|||
\x{09}
|
||||
\x{1D}
|
||||
\x{85}
|
||||
\x{61c}
|
||||
\x{2028}
|
||||
\x{2029}
|
||||
\x{2065}
|
||||
\x{2066}
|
||||
\x{2067}
|
||||
\x{2068}
|
||||
\x{2069}
|
||||
\x{e0002}
|
||||
\x{e001f}
|
||||
\x{e0080}
|
||||
|
@ -2140,8 +2127,8 @@
|
|||
abcde
|
||||
|
||||
/^[[:^graph:]]+$/utf,ucp
|
||||
\x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
|
||||
\x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
|
||||
\x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{1680}
|
||||
\x{2028}\x{2029}\x{202f}\x{2065}
|
||||
\x{3000}\x{e0002}\x{e001f}\x{e0080}
|
||||
** Failers
|
||||
Letter:ABC
|
||||
|
@ -2162,8 +2149,8 @@
|
|||
\x{e0020}\x{e0030}\x{e0040}\x{e0050}\x{e0060}\x{e0070}\x{e007f}
|
||||
|
||||
/^[[:^print:]]+$/utf,ucp
|
||||
\x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
|
||||
\x{2068}\x{2069}\x{e0002}\x{e001f}\x{e0080}
|
||||
\x{09}\x{1D}\x{85}\x{2028}\x{2029}\x{2065}
|
||||
\x{e0002}\x{e001f}\x{e0080}
|
||||
** Failers
|
||||
Space: \x{a0}
|
||||
\x{1680}\x{2000}\x{2001}\x{2002}\x{2003}\x{2004}\x{2005}
|
||||
|
@ -2176,7 +2163,6 @@
|
|||
Punctuation:\x{66a},;
|
||||
Symbol:\x{6de}<>\x{fffc}
|
||||
Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f}
|
||||
\x{180e}
|
||||
\x{200b}\x{200c}\x{200d}\x{200e}\x{200f}
|
||||
\x{202a}\x{202b}\x{202c}\x{202d}\x{202e}
|
||||
\x{202f}
|
||||
|
|
|
@ -2,7 +2,72 @@
|
|||
# support, including Unicode properties. However, tests that give different
|
||||
# results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and
|
||||
# 12).
|
||||
|
||||
# PCRE2 and Perl disagree about the characteristics of certain Unicode
|
||||
# characters. For example, 061C is considered by Perl to be Arabic, though
|
||||
# is it not listed as such in the Unicode Scripts.txt file, and 2066-2069 are
|
||||
# graphic and printable according to Perl, though they are actually "isolate"
|
||||
# control characters. That is why the following tests are here rather than in
|
||||
# test 4.
|
||||
|
||||
/^[\p{Arabic}]/utf
|
||||
** Failers
|
||||
\x{061c}
|
||||
|
||||
/^[[:graph:]]+$/utf,ucp
|
||||
** Failers
|
||||
\x{61c}
|
||||
\x{2066}
|
||||
\x{2067}
|
||||
\x{2068}
|
||||
\x{2069}
|
||||
|
||||
/^[[:print:]]+$/utf,ucp
|
||||
** Failers
|
||||
\x{61c}
|
||||
\x{2066}
|
||||
\x{2067}
|
||||
\x{2068}
|
||||
\x{2069}
|
||||
|
||||
/^[[:^graph:]]+$/utf,ucp
|
||||
\x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}
|
||||
\x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
|
||||
|
||||
/^[[:^print:]]+$/utf,ucp
|
||||
\x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
|
||||
\x{2068}\x{2069}
|
||||
|
||||
# Perl does not consider U+180e to be a space character. It is true that it
|
||||
# does not appear in the Unicode PropList.txt file as such, but in many other
|
||||
# sources it is listed as a space, and has been treated as such in PCRE for
|
||||
# a long time.
|
||||
|
||||
/^>[[:blank:]]*/utf,ucp
|
||||
>\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
|
||||
|
||||
/^A\s+Z/utf,ucp
|
||||
A\x{85}\x{180e}\x{2005}Z
|
||||
|
||||
/^A[\s]+Z/utf,ucp
|
||||
A\x{2005}Z
|
||||
A\x{85}\x{2005}Z
|
||||
|
||||
/^[[:graph:]]+$/utf,ucp
|
||||
\x{180e}
|
||||
|
||||
/^[[:print:]]+$/utf,ucp
|
||||
\x{180e}
|
||||
|
||||
/^[[:^graph:]]+$/utf,ucp
|
||||
\x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
|
||||
|
||||
/^[[:^print:]]+$/utf,ucp
|
||||
\x{180e}
|
||||
|
||||
# End of U+180E tests.
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
/\x{110000}/IB,utf
|
||||
|
||||
|
@ -872,9 +937,8 @@
|
|||
\x{2028}
|
||||
\x{200d}
|
||||
|
||||
# These are here rather than in test 6 because Perl has problems with
|
||||
# the negative versions of the properties and behaves has changed how
|
||||
# it behaves for caseless matching.
|
||||
# These are here because Perl has problems with the negative versions of the
|
||||
# properties and has changed how it behaves for caseless matching.
|
||||
|
||||
/\p{^Lu}/i,utf
|
||||
1234
|
||||
|
@ -1264,8 +1328,6 @@
|
|||
/(\x{2c65}\x{2c65})\1Y/i,utf
|
||||
X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
|
||||
|
||||
#
|
||||
|
||||
# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE
|
||||
|
||||
/^[\p{Batak}]/utf
|
||||
|
@ -1287,8 +1349,6 @@
|
|||
\x{85c}
|
||||
\x{85d}
|
||||
|
||||
#
|
||||
|
||||
/(\X*)(.)/s,utf
|
||||
A\x{300}
|
||||
|
||||
|
|
|
@ -1983,8 +1983,6 @@ No match
|
|||
\x{060b}
|
||||
0: \x{60b}
|
||||
** Failers
|
||||
No match
|
||||
\x{061c}
|
||||
No match
|
||||
X\x{06e9}
|
||||
No match
|
||||
|
@ -2578,8 +2576,8 @@ No match
|
|||
0: > \x{a0}\x{1680}\x{2028}\x{2029}\x{202f}\x{09}\x{0b}
|
||||
|
||||
/^>[[:blank:]]*/utf,ucp
|
||||
>\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
|
||||
0: > \x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{09}
|
||||
>\x{20}\x{a0}\x{1680}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
|
||||
0: > \x{a0}\x{1680}\x{2000}\x{202f}\x{09}
|
||||
|
||||
/^[[:alpha:]]*/utf,ucp
|
||||
Az\x{aa}\x{c0}\x{1c5}\x{2b0}\x{3b6}\x{1d7c9}\x{2fa1d}
|
||||
|
@ -2591,7 +2589,7 @@ No match
|
|||
|
||||
/^[[:cntrl:]]*/utf,ucp
|
||||
\x{0}\x{09}\x{1f}\x{7f}\x{9f}
|
||||
0: \x{00}\x{09}\x{1f}\x{7f}
|
||||
0: \x{00}\x{09}\x{1f}\x{7f}\x{9f}
|
||||
|
||||
/^[[:graph:]]*/utf,ucp
|
||||
A\x{a1}\x{a0}
|
||||
|
@ -3414,14 +3412,14 @@ No match
|
|||
/^A\s+Z/utf,ucp
|
||||
A\x{2005}Z
|
||||
0: A\x{2005}Z
|
||||
A\x{85}\x{180e}\x{2005}Z
|
||||
0: A\x{85}\x{180e}\x{2005}Z
|
||||
A\x{85}\x{2005}Z
|
||||
0: A\x{85}\x{2005}Z
|
||||
|
||||
/^A[\s]+Z/utf,ucp
|
||||
A\x{2005}Z
|
||||
0: A\x{2005}Z
|
||||
A\x{85}\x{180e}\x{2005}Z
|
||||
0: A\x{85}\x{180e}\x{2005}Z
|
||||
A\x{85}\x{2005}Z
|
||||
0: A\x{85}\x{2005}Z
|
||||
|
||||
/^[[:graph:]]+$/utf,ucp
|
||||
Letter:ABC
|
||||
|
@ -3469,12 +3467,8 @@ No match
|
|||
\x{85}
|
||||
No match
|
||||
\x{a0}
|
||||
No match
|
||||
\x{61c}
|
||||
No match
|
||||
\x{1680}
|
||||
No match
|
||||
\x{180e}
|
||||
No match
|
||||
\x{2028}
|
||||
No match
|
||||
|
@ -3483,14 +3477,6 @@ No match
|
|||
\x{202f}
|
||||
No match
|
||||
\x{2065}
|
||||
No match
|
||||
\x{2066}
|
||||
No match
|
||||
\x{2067}
|
||||
No match
|
||||
\x{2068}
|
||||
No match
|
||||
\x{2069}
|
||||
No match
|
||||
\x{3000}
|
||||
No match
|
||||
|
@ -3524,8 +3510,6 @@ No match
|
|||
0: Symbol:\x{6de}<>\x{fffc}
|
||||
Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f}
|
||||
0: Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f}
|
||||
\x{180e}
|
||||
0: \x{180e}
|
||||
\x{200b}\x{200c}\x{200d}\x{200e}\x{200f}
|
||||
0: \x{200b}\x{200c}\x{200d}\x{200e}\x{200f}
|
||||
\x{202a}\x{202b}\x{202c}\x{202d}\x{202e}
|
||||
|
@ -3555,22 +3539,12 @@ No match
|
|||
\x{1D}
|
||||
No match
|
||||
\x{85}
|
||||
No match
|
||||
\x{61c}
|
||||
No match
|
||||
\x{2028}
|
||||
No match
|
||||
\x{2029}
|
||||
No match
|
||||
\x{2065}
|
||||
No match
|
||||
\x{2066}
|
||||
No match
|
||||
\x{2067}
|
||||
No match
|
||||
\x{2068}
|
||||
No match
|
||||
\x{2069}
|
||||
No match
|
||||
\x{e0002}
|
||||
No match
|
||||
|
@ -3594,10 +3568,10 @@ No match
|
|||
No match
|
||||
|
||||
/^[[:^graph:]]+$/utf,ucp
|
||||
\x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
|
||||
0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{61c}\x{1680}\x{180e}
|
||||
\x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
|
||||
0: \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
|
||||
\x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{1680}
|
||||
0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{1680}
|
||||
\x{2028}\x{2029}\x{202f}\x{2065}
|
||||
0: \x{2028}\x{2029}\x{202f}\x{2065}
|
||||
\x{3000}\x{e0002}\x{e001f}\x{e0080}
|
||||
0: \x{3000}\x{e0002}\x{e001f}\x{e0080}
|
||||
** Failers
|
||||
|
@ -3636,10 +3610,10 @@ No match
|
|||
No match
|
||||
|
||||
/^[[:^print:]]+$/utf,ucp
|
||||
\x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
|
||||
0: \x{09}\x{1d}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
|
||||
\x{2068}\x{2069}\x{e0002}\x{e001f}\x{e0080}
|
||||
0: \x{2068}\x{2069}\x{e0002}\x{e001f}\x{e0080}
|
||||
\x{09}\x{1D}\x{85}\x{2028}\x{2029}\x{2065}
|
||||
0: \x{09}\x{1d}\x{85}\x{2028}\x{2029}\x{2065}
|
||||
\x{e0002}\x{e001f}\x{e0080}
|
||||
0: \x{e0002}\x{e001f}\x{e0080}
|
||||
** Failers
|
||||
No match
|
||||
Space: \x{a0}
|
||||
|
@ -3663,8 +3637,6 @@ No match
|
|||
Symbol:\x{6de}<>\x{fffc}
|
||||
No match
|
||||
Cf-property:\x{ad}\x{600}\x{601}\x{602}\x{603}\x{604}\x{6dd}\x{70f}
|
||||
No match
|
||||
\x{180e}
|
||||
No match
|
||||
\x{200b}\x{200c}\x{200d}\x{200e}\x{200f}
|
||||
No match
|
||||
|
|
|
@ -2,7 +2,98 @@
|
|||
# support, including Unicode properties. However, tests that give different
|
||||
# results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and
|
||||
# 12).
|
||||
|
||||
# PCRE2 and Perl disagree about the characteristics of certain Unicode
|
||||
# characters. For example, 061C is considered by Perl to be Arabic, though
|
||||
# is it not listed as such in the Unicode Scripts.txt file, and 2066-2069 are
|
||||
# graphic and printable according to Perl, though they are actually "isolate"
|
||||
# control characters. That is why the following tests are here rather than in
|
||||
# test 4.
|
||||
|
||||
/^[\p{Arabic}]/utf
|
||||
** Failers
|
||||
No match
|
||||
\x{061c}
|
||||
No match
|
||||
|
||||
/^[[:graph:]]+$/utf,ucp
|
||||
** Failers
|
||||
No match
|
||||
\x{61c}
|
||||
No match
|
||||
\x{2066}
|
||||
No match
|
||||
\x{2067}
|
||||
No match
|
||||
\x{2068}
|
||||
No match
|
||||
\x{2069}
|
||||
No match
|
||||
|
||||
/^[[:print:]]+$/utf,ucp
|
||||
** Failers
|
||||
0: ** Failers
|
||||
\x{61c}
|
||||
No match
|
||||
\x{2066}
|
||||
No match
|
||||
\x{2067}
|
||||
No match
|
||||
\x{2068}
|
||||
No match
|
||||
\x{2069}
|
||||
No match
|
||||
|
||||
/^[[:^graph:]]+$/utf,ucp
|
||||
\x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}
|
||||
0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{61c}\x{1680}
|
||||
\x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
|
||||
0: \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
|
||||
|
||||
/^[[:^print:]]+$/utf,ucp
|
||||
\x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
|
||||
0: \x{09}\x{1d}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
|
||||
\x{2068}\x{2069}
|
||||
0: \x{2068}\x{2069}
|
||||
|
||||
# Perl does not consider U+180e to be a space character. It is true that it
|
||||
# does not appear in the Unicode PropList.txt file as such, but in many other
|
||||
# sources it is listed as a space, and has been treated as such in PCRE for
|
||||
# a long time.
|
||||
|
||||
/^>[[:blank:]]*/utf,ucp
|
||||
>\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
|
||||
0: > \x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{09}
|
||||
|
||||
/^A\s+Z/utf,ucp
|
||||
A\x{85}\x{180e}\x{2005}Z
|
||||
0: A\x{85}\x{180e}\x{2005}Z
|
||||
|
||||
/^A[\s]+Z/utf,ucp
|
||||
A\x{2005}Z
|
||||
0: A\x{2005}Z
|
||||
A\x{85}\x{2005}Z
|
||||
0: A\x{85}\x{2005}Z
|
||||
|
||||
/^[[:graph:]]+$/utf,ucp
|
||||
\x{180e}
|
||||
No match
|
||||
|
||||
/^[[:print:]]+$/utf,ucp
|
||||
\x{180e}
|
||||
0: \x{180e}
|
||||
|
||||
/^[[:^graph:]]+$/utf,ucp
|
||||
\x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
|
||||
0: \x{09}\x{0a}\x{1d} \x{85}\x{a0}\x{61c}\x{1680}\x{180e}
|
||||
|
||||
/^[[:^print:]]+$/utf,ucp
|
||||
\x{180e}
|
||||
No match
|
||||
|
||||
# End of U+180E tests.
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
/\x{110000}/IB,utf
|
||||
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
|
||||
|
@ -2015,9 +2106,8 @@ No match
|
|||
\x{200d}
|
||||
No match
|
||||
|
||||
# These are here rather than in test 6 because Perl has problems with
|
||||
# the negative versions of the properties and behaves has changed how
|
||||
# it behaves for caseless matching.
|
||||
# These are here because Perl has problems with the negative versions of the
|
||||
# properties and has changed how it behaves for caseless matching.
|
||||
|
||||
/\p{^Lu}/i,utf
|
||||
1234
|
||||
|
@ -2520,7 +2610,7 @@ No match
|
|||
/[[:cntrl:]]/B,ucp
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[\x00-\x1f\x7f]
|
||||
[\p{Cc}]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
@ -2626,7 +2716,7 @@ No match
|
|||
/[[:^alpha:][:^cntrl:]]+/B,utf,ucp
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[ -~\x80-\xff\P{L}]++
|
||||
[\P{L}\P{Cc}]++
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
@ -2638,7 +2728,7 @@ No match
|
|||
/[[:^cntrl:][:^alpha:]]+/B,utf,ucp
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
[ -~\x80-\xff\P{L}]++
|
||||
[\P{Cc}\P{L}]++
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
@ -2850,8 +2940,6 @@ No match
|
|||
0: \x{2c65}\x{2c65}\x{23a}\x{23a}Y
|
||||
1: \x{2c65}\x{2c65}
|
||||
|
||||
#
|
||||
|
||||
# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE
|
||||
|
||||
/^[\p{Batak}]/utf
|
||||
|
@ -2886,8 +2974,6 @@ No match
|
|||
\x{85d}
|
||||
No match
|
||||
|
||||
#
|
||||
|
||||
/(\X*)(.)/s,utf
|
||||
A\x{300}
|
||||
0: A
|
||||
|
|
|
@ -659,18 +659,18 @@ Memory allocation (code space): 14
|
|||
|
||||
/[[:^alpha:][:^cntrl:]]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 26 Bra
|
||||
2 [ -~\x80-\xff\P{L}]++
|
||||
26 26 Ket
|
||||
28 End
|
||||
0 13 Bra
|
||||
2 [\P{L}\P{Cc}]++
|
||||
13 13 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:^cntrl:][:^alpha:]]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 26 Bra
|
||||
2 [ -~\x80-\xff\P{L}]++
|
||||
26 26 Ket
|
||||
28 End
|
||||
0 13 Bra
|
||||
2 [\P{Cc}\P{L}]++
|
||||
13 13 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:alpha:]]+/utf,ucp
|
||||
|
|
|
@ -659,18 +659,18 @@ Memory allocation (code space): 28
|
|||
|
||||
/[[:^alpha:][:^cntrl:]]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 18 Bra
|
||||
2 [ -~\x80-\xff\P{L}]++
|
||||
18 18 Ket
|
||||
20 End
|
||||
0 13 Bra
|
||||
2 [\P{L}\P{Cc}]++
|
||||
13 13 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:^cntrl:][:^alpha:]]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 18 Bra
|
||||
2 [ -~\x80-\xff\P{L}]++
|
||||
18 18 Ket
|
||||
20 End
|
||||
0 13 Bra
|
||||
2 [\P{Cc}\P{L}]++
|
||||
13 13 Ket
|
||||
15 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:alpha:]]+/utf,ucp
|
||||
|
|
|
@ -659,18 +659,18 @@ Memory allocation (code space): 10
|
|||
|
||||
/[[:^alpha:][:^cntrl:]]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 44 Bra
|
||||
3 [ -~\x80-\xff\P{L}]++
|
||||
44 44 Ket
|
||||
47 End
|
||||
0 15 Bra
|
||||
3 [\P{L}\P{Cc}]++
|
||||
15 15 Ket
|
||||
18 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:^cntrl:][:^alpha:]]+/utf,ucp
|
||||
------------------------------------------------------------------
|
||||
0 44 Bra
|
||||
3 [ -~\x80-\xff\P{L}]++
|
||||
44 44 Ket
|
||||
47 End
|
||||
0 15 Bra
|
||||
3 [\P{Cc}\P{L}]++
|
||||
15 15 Ket
|
||||
18 End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/[[:alpha:]]+/utf,ucp
|
||||
|
|
Loading…
Reference in New Issue