Allow anchored patterns to use "first code unit" optimization.

This commit is contained in:
Philip.Hazel 2017-06-30 16:00:33 +00:00
parent cc089cf971
commit b7d5cee61f
15 changed files with 673 additions and 273 deletions

View File

@ -205,6 +205,11 @@ JIT.
subjects from 1000 to 2000 for 8-bit searches, since they use memchr() and are subjects from 1000 to 2000 for 8-bit searches, since they use memchr() and are
much faster. much faster.
46. Arrange for anchored patterns to record and use "first code unit" data,
because this can give a fast "no match" without searching for a "required code
unit". Previously only non-anchored patterns did this.
Version 10.23 14-February-2017 Version 10.23 14-February-2017
------------------------------ ------------------------------

View File

@ -9632,14 +9632,19 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
is_anchored(codestart, 0, &cb, 0, FALSE)) is_anchored(codestart, 0, &cb, 0, FALSE))
re->overall_options |= PCRE2_ANCHORED; re->overall_options |= PCRE2_ANCHORED;
/* If the pattern is still not anchored and we do not have a first code unit, /* Set up the first code unit or startline flag, the required code unit, and
see if there is one that is asserted (these are not saved during the compile then study the pattern. This code need not be obeyed if PCRE2_NO_START_OPTIMIZE
because they can cause conflicts with actual literals that follow). This code is set, as the data it would create will not be used. Note that a first code
need not be obeyed if PCRE2_NO_START_OPTIMIZE is set, as the data it would unit (but not the startline flag) is useful for anchored patterns because it
create will not be used. */ can still give a quick "no match" and also avoid searching for a last code
unit. */
if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0) if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
{ {
/* If we do not have a first code unit, see if there is one that is asserted
(these are not saved during the compile because they can cause conflicts with
actual literals that follow). */
if (firstcuflags < 0) if (firstcuflags < 0)
firstcu = find_firstassertedcu(codestart, &firstcuflags, FALSE); firstcu = find_firstassertedcu(codestart, &firstcuflags, FALSE);
@ -9672,22 +9677,21 @@ if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0)
} }
} }
/* When there is no first code unit, see if we can set the PCRE2_STARTLINE /* When there is no first code unit, for non-anchored patterns, see if we can
flag. This is helpful for multiline matches when all branches start with ^ set the PCRE2_STARTLINE flag. This is helpful for multiline matches when all
and also when all branches start with non-atomic .* for non-DOTALL matches branches start with ^ and also when all branches start with non-atomic .* for
when *PRUNE and SKIP are not present. (There is an option that disables this non-DOTALL matches when *PRUNE and SKIP are not present. (There is an option
case.) */ that disables this case.) */
else if (is_startline(codestart, 0, &cb, 0, FALSE)) else if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
is_startline(codestart, 0, &cb, 0, FALSE))
re->flags |= PCRE2_STARTLINE; re->flags |= PCRE2_STARTLINE;
}
/* Handle the "required code unit", if one is set. In the case of an anchored /* Handle the "required code unit", if one is set. In the case of an anchored
pattern, do this only if it follows a variable length item in the pattern. pattern, do this only if it follows a variable length item in the pattern. */
Again, skip this if PCRE2_NO_START_OPTIMIZE is set. */
if (reqcuflags >= 0 && if (reqcuflags >= 0 &&
((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0 || ((re->overall_options & PCRE2_ANCHORED) == 0 ||
(reqcuflags & REQ_VARY) != 0)) (reqcuflags & REQ_VARY) != 0))
{ {
re->last_codeunit = reqcu; re->last_codeunit = reqcu;
@ -9708,16 +9712,15 @@ if (reqcuflags >= 0 &&
} }
} }
/* Finally, unless PCRE2_NO_START_OPTIMIZE is set, study the compiled pattern /* Finally, study the compiled pattern to set up information such as a bitmap
to set up information such as a bitmap of starting code units and a minimum of starting code units and a minimum matching length. */
matching length. */
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && if (PRIV(study)(re) != 0)
PRIV(study)(re) != 0)
{ {
errorcode = ERR31; errorcode = ERR31;
goto HAD_CB_ERROR; goto HAD_CB_ERROR;
} }
} /* End of start-of-match optimizations. */
/* Control ends up here in all cases. When running under valgrind, make a /* Control ends up here in all cases. When running under valgrind, make a
pattern's terminating zero defined again. If memory was obtained for the parsed pattern's terminating zero defined again. If memory was obtained for the parsed

View File

@ -3341,15 +3341,10 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
} }
#endif /* SUPPORT_UNICODE */ #endif /* SUPPORT_UNICODE */
/* Set up the first code unit to match, if available. The first_codeunit value /* Set up the first code unit to match, if available. If there's no first code
is never set for an anchored regular expression, but the anchoring may be unit there may be a bitmap of possible first characters. */
forced at run time, so we have to test for anchoring. The first code unit may
be unset for an unanchored pattern, of course. If there's no first code unit
there may be a bitmap of possible first characters. */
if (!anchored) if ((re->flags & PCRE2_FIRSTSET) != 0)
{
if ((re->flags & PCRE2_FIRSTSET) != 0)
{ {
has_first_cu = TRUE; has_first_cu = TRUE;
first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit); first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
@ -3362,13 +3357,11 @@ if (!anchored)
#endif #endif
} }
} }
else else
if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0) if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
start_bits = re->start_bitmap; start_bits = re->start_bitmap;
}
/* For anchored or unanchored matches, there may be a "last known required /* There may be a "last known required code unit" set. */
character" set. */
if ((re->flags & PCRE2_LASTSET) != 0) if ((re->flags & PCRE2_LASTSET) != 0)
{ {
@ -3414,8 +3407,8 @@ for (;;)
/* If firstline is TRUE, the start of the match is constrained to the first /* If firstline is TRUE, the start of the match is constrained to the first
line of a multiline string. That is, the match must be before or at the line of a multiline string. That is, the match must be before or at the
first newline. Implement this by temporarily adjusting end_subject so that first newline. Implement this by temporarily adjusting end_subject so that
we stop the optimization scans at a newline. If the match fails at the we stop the optimization scans for a first code unit at a newline. If the
newline, later code breaks this loop. */ match fails at the newline, later code breaks this loop. */
if (firstline) if (firstline)
{ {
@ -3435,21 +3428,87 @@ for (;;)
end_subject = t; end_subject = t;
} }
/* Advance to a unique first code unit if there is one. */ /* Anchored: check the first code unit if one is recorded. This may seem
pointless but it can help in detecting a no match case without scanning for
the required code unit. */
if (has_first_cu) if (anchored)
{ {
PCRE2_UCHAR smc; if (has_first_cu || start_bits != NULL)
if (first_cu != first_cu2) {
while (start_match < end_subject && BOOL ok = start_match < end_subject;
(smc = UCHAR21TEST(start_match)) != first_cu && smc != first_cu2) if (ok)
start_match++; {
else PCRE2_UCHAR c = UCHAR21TEST(start_match);
while (start_match < end_subject && UCHAR21TEST(start_match) != first_cu) ok = has_first_cu && (c == first_cu || c == first_cu2);
start_match++; if (!ok && start_bits != NULL)
{
#if PCRE2_CODE_UNIT_WIDTH != 8
if (c > 255) c = 255;
#endif
ok = (start_bits[c/8] & (1 << (c&7))) != 0;
}
}
if (!ok) break;
}
} }
/* Or to just after a linebreak for a multiline match */ /* Not anchored. Advance to a unique first code unit if there is one. In
8-bit mode, the use of memchr() gives a big speed up, even though we have
to call it twice in caseless mode, in order to find the earliest occurrence
of the character in either of its cases. */
else
{
if (has_first_cu)
{
if (first_cu != first_cu2) /* Caseless */
{
#if PCRE2_CODE_UNIT_WIDTH != 8
PCRE2_UCHAR smc;
while (start_match < end_subject &&
(smc = UCHAR21TEST(start_match)) != first_cu &&
smc != first_cu2)
start_match++;
#else /* 8-bit code units */
PCRE2_SPTR pp1 =
memchr(start_match, first_cu, end_subject-start_match);
PCRE2_SPTR pp2 =
memchr(start_match, first_cu2, end_subject-start_match);
if (pp1 == NULL)
start_match = (pp2 == NULL)? end_subject : pp2;
else
start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
#endif
}
/* The caseful case */
else
{
#if PCRE2_CODE_UNIT_WIDTH != 8
while (start_match < end_subject && UCHAR21TEST(start_match) !=
first_cu)
start_match++;
#else
start_match = memchr(start_match, first_cu, end_subject - start_match);
if (start_match == NULL) start_match = end_subject;
#endif
}
/* If we can't find the required code unit, break the bumpalong loop,
to force a match failure, except when doing partial matching, when we
let the next cycle run at the end of the subject. To see why, consider
the pattern /(?<=abc)def/, which partially matches "abc", even though
the string does not contain the starting character "d". */
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 &&
start_match >= end_subject)
break;
}
/* If there's no first code unit, advance to just after a linebreak for a
multiline match if required. */
else if (startline) else if (startline)
{ {
@ -3471,8 +3530,8 @@ for (;;)
start_match++; start_match++;
/* If we have just passed a CR and the newline option is ANY or /* If we have just passed a CR and the newline option is ANY or
ANYCRLF, and we are now at a LF, advance the match position by one more ANYCRLF, and we are now at a LF, advance the match position by one
code unit. */ more code unit. */
if (start_match[-1] == CHAR_CR && if (start_match[-1] == CHAR_CR &&
(mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) && (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
@ -3482,9 +3541,10 @@ for (;;)
} }
} }
/* Or to a non-unique first code unit if any have been identified. The /* If there's no first code unit or a requirement for a multiline line
bitmap contains only 256 bits. When code units are 16 or 32 bits wide, all start, advance to a non-unique first code unit if any have been
code units greater than 254 set the 255 bit. */ identified. The bitmap contains only 256 bits. When code units are 16 or
32 bits wide, all code units greater than 254 set the 255 bit. */
else if (start_bits != NULL) else if (start_bits != NULL)
{ {
@ -3498,6 +3558,7 @@ for (;;)
start_match++; start_match++;
} }
} }
} /* End of first code unit handling */
/* Restore fudged end_subject */ /* Restore fudged end_subject */

View File

@ -6333,15 +6333,10 @@ mb->lcc = re->tables + lcc_offset;
mb->fcc = re->tables + fcc_offset; mb->fcc = re->tables + fcc_offset;
mb->ctypes = re->tables + ctypes_offset; mb->ctypes = re->tables + ctypes_offset;
/* Set up the first code unit to match, if available. The first_codeunit value /* Set up the first code unit to match, if available. If there's no first code
is never set for an anchored regular expression, but the anchoring may be unit there may be a bitmap of possible first characters. */
forced at run time, so we have to test for anchoring. The first code unit may
be unset for an unanchored pattern, of course. If there's no first code unit
there may be a bitmap of possible first characters. */
if (!anchored) if ((re->flags & PCRE2_FIRSTSET) != 0)
{
if ((re->flags & PCRE2_FIRSTSET) != 0)
{ {
has_first_cu = TRUE; has_first_cu = TRUE;
first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit); first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
@ -6353,13 +6348,11 @@ if (!anchored)
#endif #endif
} }
} }
else else
if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0) if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
start_bits = re->start_bitmap; start_bits = re->start_bitmap;
}
/* For anchored or unanchored matches, there may be a "last known required /* There may also be a "last known required character" set. */
character" set. */
if ((re->flags & PCRE2_LASTSET) != 0) if ((re->flags & PCRE2_LASTSET) != 0)
{ {
@ -6398,8 +6391,8 @@ for(;;)
/* If firstline is TRUE, the start of the match is constrained to the first /* If firstline is TRUE, the start of the match is constrained to the first
line of a multiline string. That is, the match must be before or at the line of a multiline string. That is, the match must be before or at the
first newline. Implement this by temporarily adjusting end_subject so that first newline. Implement this by temporarily adjusting end_subject so that
we stop the optimization scans at a newline. If the match fails at the we stop the optimization scans for a first code unit at a newline. If the
newline, later code breaks this loop. */ match fails at the newline, later code breaks this loop. */
if (firstline) if (firstline)
{ {
@ -6419,11 +6412,42 @@ for(;;)
end_subject = t; end_subject = t;
} }
/* Advance to a unique first code unit if there is one. In 8-bit mode, the /* Anchored: check the first code unit if one is recorded. This may seem
use of memchr() gives a big speed up, even though we have to call it twice pointless but it can help in detecting a no match case without scanning for
in caseless mode, in order to find the first occurrence of the character in the required code unit. */
either of its cases. */
if (anchored)
{
if (has_first_cu || start_bits != NULL)
{
BOOL ok = start_match < end_subject;
if (ok)
{
PCRE2_UCHAR c = UCHAR21TEST(start_match);
ok = has_first_cu && (c == first_cu || c == first_cu2);
if (!ok && start_bits != NULL)
{
#if PCRE2_CODE_UNIT_WIDTH != 8
if (c > 255) c = 255;
#endif
ok = (start_bits[c/8] & (1 << (c&7))) != 0;
}
}
if (!ok)
{
rc = MATCH_NOMATCH;
break;
}
}
}
/* Not anchored. Advance to a unique first code unit if there is one. In
8-bit mode, the use of memchr() gives a big speed up, even though we have
to call it twice in caseless mode, in order to find the earliest occurrence
of the character in either of its cases. */
else
{
if (has_first_cu) if (has_first_cu)
{ {
if (first_cu != first_cu2) /* Caseless */ if (first_cu != first_cu2) /* Caseless */
@ -6431,11 +6455,14 @@ for(;;)
#if PCRE2_CODE_UNIT_WIDTH != 8 #if PCRE2_CODE_UNIT_WIDTH != 8
PCRE2_UCHAR smc; PCRE2_UCHAR smc;
while (start_match < end_subject && while (start_match < end_subject &&
(smc = UCHAR21TEST(start_match)) != first_cu && smc != first_cu2) (smc = UCHAR21TEST(start_match)) != first_cu &&
smc != first_cu2)
start_match++; start_match++;
#else /* 8-bit code units */ #else /* 8-bit code units */
PCRE2_SPTR pp1 = memchr(start_match, first_cu, end_subject-start_match); PCRE2_SPTR pp1 =
PCRE2_SPTR pp2 = memchr(start_match, first_cu2, end_subject-start_match); memchr(start_match, first_cu, end_subject-start_match);
PCRE2_SPTR pp2 =
memchr(start_match, first_cu2, end_subject-start_match);
if (pp1 == NULL) if (pp1 == NULL)
start_match = (pp2 == NULL)? end_subject : pp2; start_match = (pp2 == NULL)? end_subject : pp2;
else else
@ -6448,7 +6475,8 @@ for(;;)
else else
{ {
#if PCRE2_CODE_UNIT_WIDTH != 8 #if PCRE2_CODE_UNIT_WIDTH != 8
while (start_match < end_subject && UCHAR21TEST(start_match) != first_cu) while (start_match < end_subject && UCHAR21TEST(start_match) !=
first_cu)
start_match++; start_match++;
#else #else
start_match = memchr(start_match, first_cu, end_subject - start_match); start_match = memchr(start_match, first_cu, end_subject - start_match);
@ -6456,11 +6484,11 @@ for(;;)
#endif #endif
} }
/* If we can't find the required code unit, break the bumpalong loop, to /* If we can't find the required code unit, break the bumpalong loop,
force a match failure, except when doing partial matching, when we let to force a match failure, except when doing partial matching, when we
the next cycle run at the end of the subject. To see why, consider the let the next cycle run at the end of the subject. To see why, consider
pattern /(?<=abc)def/, which partially matches "abc", even though the the pattern /(?<=abc)def/, which partially matches "abc", even though
string does not contain the starting character "d". */ the string does not contain the starting character "d". */
if (!mb->partial && start_match >= end_subject) if (!mb->partial && start_match >= end_subject)
{ {
@ -6492,8 +6520,8 @@ for(;;)
start_match++; start_match++;
/* If we have just passed a CR and the newline option is ANY or /* If we have just passed a CR and the newline option is ANY or
ANYCRLF, and we are now at a LF, advance the match position by one more ANYCRLF, and we are now at a LF, advance the match position by one
code unit. */ more code unit. */
if (start_match[-1] == CHAR_CR && if (start_match[-1] == CHAR_CR &&
(mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) && (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
@ -6504,9 +6532,9 @@ for(;;)
} }
/* If there's no first code unit or a requirement for a multiline line /* If there's no first code unit or a requirement for a multiline line
start, advance to a non-unique first code unit if any have been identified. start, advance to a non-unique first code unit if any have been
The bitmap contains only 256 bits. When code units are 16 or 32 bits wide, identified. The bitmap contains only 256 bits. When code units are 16 or
all code units greater than 254 set the 255 bit. */ 32 bits wide, all code units greater than 254 set the 255 bit. */
else if (start_bits != NULL) else if (start_bits != NULL)
{ {
@ -6520,6 +6548,7 @@ for(;;)
start_match++; start_match++;
} }
} }
} /* End first code unit handling */
/* Restore fudged end_subject */ /* Restore fudged end_subject */

View File

@ -953,7 +953,6 @@ do
case OP_ALLANY: case OP_ALLANY:
case OP_ANY: case OP_ANY:
case OP_ANYBYTE: case OP_ANYBYTE:
case OP_CIRC:
case OP_CIRCM: case OP_CIRCM:
case OP_CLOSE: case OP_CLOSE:
case OP_COMMIT: case OP_COMMIT:
@ -1021,6 +1020,13 @@ do
case OP_THEN_ARG: case OP_THEN_ARG:
return SSB_FAIL; return SSB_FAIL;
/* OP_CIRC happens only at the start of an anchored branch (multiline ^
uses OP_CIRCM). Skip over it. */
case OP_CIRC:
tcode += PRIV(OP_lengths)[OP_CIRC];
break;
/* A "real" property test implies no starting bits, but the fake property /* A "real" property test implies no starting bits, but the fake property
PT_CLIST identifies a list of characters. These lists are short, as they PT_CLIST identifies a list of characters. These lists are short, as they
are used for characters with more than one "other case", so there is no are used for characters with more than one "other case", so there is no
@ -1579,12 +1585,11 @@ BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
re->name_entry_size * re->name_count; re->name_entry_size * re->name_count;
/* For an anchored pattern, or an unanchored pattern that has a first code /* For a pattern that has a first code unit, or a multiline pattern that
unit, or a multiline pattern that matches only at "line start", there is no matches only at "line start", there is no point in seeking a list of starting
point in seeking a list of starting code units. */ code units. */
if ((re->overall_options & PCRE2_ANCHORED) == 0 && if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
(re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
{ {
int rc = set_start_bits(re, code, utf); int rc = set_start_bits(re, code, utf);
if (rc == SSB_UNKNOWN) return 1; if (rc == SSB_UNKNOWN) return 1;

View File

@ -467,4 +467,13 @@
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes /\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
\x{dfff}\x{df01}\=no_utf_check \x{dfff}\x{df01}\=no_utf_check
# This has different starting code units in 8-bit mode.
/^[^ab]/IB,utf
c
\x{ff}
\x{100}
\= Expect no match
aaa
# End of testinput10 # End of testinput10

View File

@ -373,4 +373,13 @@
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes /\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
\x{dfff}\x{df01}\=no_utf_check \x{dfff}\x{df01}\=no_utf_check
# This has different starting code units in 8-bit mode.
/^[^ab]/IB,utf
c
\x{ff}
\x{100}
\= Expect no match
aaa
# End of testinput12 # End of testinput12

3
testdata/testinput2 vendored
View File

@ -5256,6 +5256,9 @@ a)"xI
XAB XAB
/^(?!A(?C1)B)C/ /^(?!A(?C1)B)C/
ABC\=callout_error=1,no_jit
/^(?!A(?C1)B)C/no_start_optimize
ABC\=callout_error=1 ABC\=callout_error=1
/^(?(?!A(?C1)B)C)/ /^(?(?!A(?C1)B)C)/

12
testdata/testinput5 vendored
View File

@ -120,13 +120,6 @@
\x{ff} \x{ff}
\x{100} \x{100}
/^[^ab]/IB,utf
c
\x{ff}
\x{100}
\= Expect no match
aaa
/\x{100}*(\d+|"(?1)")/utf /\x{100}*(\d+|"(?1)")/utf
1234 1234
"1234" "1234"
@ -190,7 +183,10 @@
/\w/utf /\w/utf
\x{100}X \x{100}X
/^\ሴ/IB,utf # Use no_start_optimize because the first code unit is different in 8-bit from
# the wider modes.
/^\ሴ/IB,utf,no_start_optimize
/()()()()()()()()()() /()()()()()()()()()()
()()()()()()()()()() ()()()()()()()()()()

34
testdata/testoutput10 vendored
View File

@ -1586,4 +1586,38 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP),
\x{dfff}\x{df01}\=no_utf_check \x{dfff}\x{df01}\=no_utf_check
0: \x{dfff}\x{df01} 0: \x{dfff}\x{df01}
# This has different starting code units in 8-bit mode.
/^[^ab]/IB,utf
------------------------------------------------------------------
Bra
^
[\x00-`c-\xff] (neg)
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
Compile options: utf
Overall options: anchored utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
\xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
\xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
\xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
\xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
\xfe \xff
Subject length lower bound = 1
c
0: c
\x{ff}
0: \x{ff}
\x{100}
0: \x{100}
\= Expect no match
aaa
No match
# End of testinput10 # End of testinput10

View File

@ -1433,4 +1433,42 @@ Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowe
Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
\x{dfff}\x{df01}\=no_utf_check \x{dfff}\x{df01}\=no_utf_check
# This has different starting code units in 8-bit mode.
/^[^ab]/IB,utf
------------------------------------------------------------------
Bra
^
[\x00-`c-\xff] (neg)
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
Compile options: utf
Overall options: anchored utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
\x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
\x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
\x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
\xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
\xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca
\xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9
\xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8
\xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7
\xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
Subject length lower bound = 1
c
0: c
\x{ff}
0: \x{ff}
\x{100}
0: \x{100}
\= Expect no match
aaa
No match
# End of testinput12 # End of testinput12

View File

@ -1425,4 +1425,42 @@ No match
\x{dfff}\x{df01}\=no_utf_check \x{dfff}\x{df01}\=no_utf_check
0: \x{dfff}\x{df01} 0: \x{dfff}\x{df01}
# This has different starting code units in 8-bit mode.
/^[^ab]/IB,utf
------------------------------------------------------------------
Bra
^
[\x00-`c-\xff] (neg)
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
Compile options: utf
Overall options: anchored utf
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
\x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
\x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
\x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
\xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
\xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca
\xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9
\xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8
\xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7
\xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
Subject length lower bound = 1
c
0: c
\x{ff}
0: \x{ff}
\x{100}
0: \x{100}
\= Expect no match
aaa
No match
# End of testinput12 # End of testinput12

View File

@ -368,6 +368,7 @@ No match
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 6 Subject length lower bound = 6
JIT compilation was successful JIT compilation was successful
#pop jitverify #pop jitverify
@ -379,6 +380,7 @@ JIT compilation was successful
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 6 Subject length lower bound = 6
JIT compilation was successful JIT compilation was successful
#save testsaved1 #save testsaved1

188
testdata/testoutput2 vendored
View File

@ -72,6 +72,7 @@ No match
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 3 Subject length lower bound = 3
abc abc
0: abc 0: abc
@ -110,6 +111,7 @@ Subject length lower bound = 2
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 3 Subject length lower bound = 3
abc abc
0: abc 0: abc
@ -339,6 +341,7 @@ Subject length lower bound = 19
/the quick brown fox/I,anchored /the quick brown fox/I,anchored
Capturing subpattern count = 0 Capturing subpattern count = 0
Options: anchored Options: anchored
First code unit = 't'
Subject length lower bound = 19 Subject length lower bound = 19
the quick brown fox the quick brown fox
0: the quick brown fox 0: the quick brown fox
@ -351,6 +354,7 @@ Failed: error 111 at offset 4: unrecognized character after (? or (?-
/^abc|def/I /^abc|def/I
Capturing subpattern count = 0 Capturing subpattern count = 0
Starting code units: a d
Subject length lower bound = 3 Subject length lower bound = 3
abcdef abcdef
0: abc 0: abc
@ -495,12 +499,14 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = '1'
Subject length lower bound = 4 Subject length lower bound = 4
/(^b|(?i)^d)/I /(^b|(?i)^d)/I
Capturing subpattern count = 1 Capturing subpattern count = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: D b d
Subject length lower bound = 1 Subject length lower bound = 1
/(?s).*/I /(?s).*/I
@ -624,6 +630,7 @@ Capturing subpattern count = 0
Max lookbehind = 1 Max lookbehind = 1
Compile options: multiline Compile options: multiline
Overall options: anchored multiline Overall options: anchored multiline
First code unit = 'a'
Subject length lower bound = 3 Subject length lower bound = 3
/^abc/Im /^abc/Im
@ -637,6 +644,7 @@ Subject length lower bound = 3
Capturing subpattern count = 5 Capturing subpattern count = 5
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 3 Subject length lower bound = 3
aaaaabbbbbcccccdef aaaaabbbbbcccccdef
0: aaaaabbbbbcccccdef 0: aaaaabbbbbcccccdef
@ -808,6 +816,7 @@ Capturing subpattern count = 1
Max back reference = 1 Max back reference = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: a
Subject length lower bound = 4 Subject length lower bound = 4
\= Expect no match \= Expect no match
aaaa aaaa
@ -1004,6 +1013,7 @@ Subject length lower bound = 16
Capturing subpattern count = 3 Capturing subpattern count = 3
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: a b
Subject length lower bound = 4 Subject length lower bound = 4
adef\=get=1,get=2,get=3,get=4,getall adef\=get=1,get=2,get=3,get=4,getall
0: adef 0: adef
@ -1042,6 +1052,7 @@ Get substring 4 failed (-49): unknown substring
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 7 Subject length lower bound = 7
abc\00def\=copy=0,getall abc\00def\=copy=0,getall
0: abc\x00def 0: abc\x00def
@ -1227,6 +1238,7 @@ Subject length lower bound = 3
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'i'
Subject length lower bound = 3 Subject length lower bound = 3
ississippi ississippi
0: iss 0: iss
@ -1286,6 +1298,7 @@ Capturing subpattern count = 0
Contains explicit CR or LF match Contains explicit CR or LF match
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 3 Subject length lower bound = 3
ab\nab\ncd ab\nab\ncd
0: ab\x0a 0: ab\x0a
@ -1776,6 +1789,8 @@ Subject length lower bound = 2
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:^alnum:]]/IB /^[[:^alnum:]]/IB
@ -1789,6 +1804,18 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
? @ [ \ ] ^ _ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88
\x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97
\x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6
\xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5
\xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4
\xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3
\xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2
\xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1
\xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:alpha:]]/IB /^[[:alpha:]]/IB
@ -1802,6 +1829,8 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
a b c d e f g h i j k l m n o p q r s t u v w x y z
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:^alpha:]]/IB /^[[:^alpha:]]/IB
@ -1815,6 +1844,19 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
5 6 7 8 9 : ; < = > ? @ [ \ ] ^ _ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
\x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93
\x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2
\xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1
\xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0
\xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf
\xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde
\xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed
\xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc
\xfd \xfe \xff
Subject length lower bound = 1 Subject length lower bound = 1
/[_[:alpha:]]/I /[_[:alpha:]]/I
@ -1834,6 +1876,12 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~
\x7f
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:^ascii:]]/IB /^[[:^ascii:]]/IB
@ -1847,6 +1895,15 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a
\x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99
\x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8
\xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7
\xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6
\xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5
\xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4
\xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3
\xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:blank:]]/IB /^[[:blank:]]/IB
@ -1860,6 +1917,7 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: \x09 \x20
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:^blank:]]/IB /^[[:^blank:]]/IB
@ -1873,6 +1931,20 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
\x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
\x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
: ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^
_ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80
\x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f
\x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e
\x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad
\xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc
\xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb
\xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda
\xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9
\xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8
\xf9 \xfa \xfb \xfc \xfd \xfe \xff
Subject length lower bound = 1 Subject length lower bound = 1
/[\n\x0b\x0c\x0d[:blank:]]/I /[\n\x0b\x0c\x0d[:blank:]]/I
@ -1892,6 +1964,9 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
\x1a \x1b \x1c \x1d \x1e \x1f \x7f
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:digit:]]/IB /^[[:digit:]]/IB
@ -1905,6 +1980,7 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: 0 1 2 3 4 5 6 7 8 9
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:graph:]]/IB /^[[:graph:]]/IB
@ -1918,6 +1994,9 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 :
; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _
` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:lower:]]/IB /^[[:lower:]]/IB
@ -1931,6 +2010,7 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: a b c d e f g h i j k l m n o p q r s t u v w x y z
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:print:]]/IB /^[[:print:]]/IB
@ -1944,6 +2024,9 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8
9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ]
^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:punct:]]/IB /^[[:punct:]]/IB
@ -1957,6 +2040,8 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^
_ ` { | } ~
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:space:]]/IB /^[[:space:]]/IB
@ -1970,6 +2055,7 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:upper:]]/IB /^[[:upper:]]/IB
@ -1983,6 +2069,7 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:xdigit:]]/IB /^[[:xdigit:]]/IB
@ -1996,6 +2083,7 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F a b c d e f
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:word:]]/IB /^[[:word:]]/IB
@ -2009,6 +2097,8 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:^cntrl:]]/IB /^[[:^cntrl:]]/IB
@ -2022,6 +2112,18 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8
9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ]
^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x80 \x81
\x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90
\x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f
\xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae
\xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd
\xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc
\xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb
\xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea
\xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9
\xfa \xfb \xfc \xfd \xfe \xff
Subject length lower bound = 1 Subject length lower bound = 1
/^[12[:^digit:]]/IB /^[12[:^digit:]]/IB
@ -2035,6 +2137,20 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 1 2 : ; <
= > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a
b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
\x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91
\x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0
\xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf
\xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe
\xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd
\xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc
\xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb
\xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa
\xfb \xfc \xfd \xfe \xff
Subject length lower bound = 1 Subject length lower bound = 1
/^[[:^blank:]]/IB /^[[:^blank:]]/IB
@ -2048,6 +2164,20 @@ Subject length lower bound = 1
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
\x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
\x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
: ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^
_ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80
\x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f
\x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e
\x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad
\xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc
\xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb
\xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda
\xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9
\xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8
\xf9 \xfa \xfb \xfc \xfd \xfe \xff
Subject length lower bound = 1 Subject length lower bound = 1
/[01[:alpha:]%]/IB /[01[:alpha:]%]/IB
@ -2418,6 +2548,7 @@ Subject length lower bound = 4
Capturing subpattern count = 2 Capturing subpattern count = 2
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 1 Subject length lower bound = 1
aba aba
0: aba 0: aba
@ -2428,6 +2559,7 @@ Subject length lower bound = 1
Capturing subpattern count = 2 Capturing subpattern count = 2
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
aabbaa aabbaa
0: aabbaa 0: aabbaa
@ -2438,6 +2570,7 @@ Subject length lower bound = 2
Capturing subpattern count = 2 Capturing subpattern count = 2
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
aabbaa aabbaa
0: aabbaa 0: aabbaa
@ -2448,6 +2581,7 @@ Subject length lower bound = 2
Capturing subpattern count = 2 Capturing subpattern count = 2
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
aabbaa aabbaa
0: aabbaa 0: aabbaa
@ -2458,6 +2592,7 @@ Subject length lower bound = 2
Capturing subpattern count = 1 Capturing subpattern count = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
aabbaa aabbaa
0: aabbaa 0: aabbaa
@ -2467,6 +2602,7 @@ Subject length lower bound = 2
Capturing subpattern count = 3 Capturing subpattern count = 3
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
aabbaa aabbaa
0: aabbaa 0: aabbaa
@ -2478,6 +2614,7 @@ Subject length lower bound = 2
Capturing subpattern count = 2 Capturing subpattern count = 2
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
aabbaa aabbaa
0: aabbaa 0: aabbaa
@ -2488,6 +2625,7 @@ Subject length lower bound = 2
Capturing subpattern count = 1 Capturing subpattern count = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
aabbaa aabbaa
0: aabbaa 0: aabbaa
@ -2497,6 +2635,7 @@ Subject length lower bound = 2
Capturing subpattern count = 1 Capturing subpattern count = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
aabbbaa aabbbaa
0: aabbbaa 0: aabbbaa
@ -2506,6 +2645,7 @@ Subject length lower bound = 2
Capturing subpattern count = 1 Capturing subpattern count = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
aabbbaa aabbbaa
0: aabbbaa 0: aabbbaa
@ -2515,6 +2655,7 @@ Subject length lower bound = 2
Capturing subpattern count = 1 Capturing subpattern count = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
aabbaa aabbaa
0: aabbaa 0: aabbaa
@ -2524,6 +2665,7 @@ Subject length lower bound = 2
Capturing subpattern count = 1 Capturing subpattern count = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
aabbbaa aabbbaa
0: aabbbaa 0: aabbbaa
@ -2533,6 +2675,7 @@ Subject length lower bound = 2
Capturing subpattern count = 3 Capturing subpattern count = 3
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
aabbbaa aabbbaa
0: aabbbaa 0: aabbbaa
@ -2544,6 +2687,7 @@ Subject length lower bound = 2
Capturing subpattern count = 3 Capturing subpattern count = 3
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
aabbbbaa aabbbbaa
0: aabbbbaa 0: aabbbbaa
@ -3052,6 +3196,7 @@ Subject length lower bound = 3
Capturing subpattern count = 5 Capturing subpattern count = 5
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 3 Subject length lower bound = 3
/^x(?U)a+b/IB /^x(?U)a+b/IB
@ -3067,6 +3212,7 @@ Subject length lower bound = 3
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'x'
Last code unit = 'b' Last code unit = 'b'
Subject length lower bound = 3 Subject length lower bound = 3
@ -3085,6 +3231,7 @@ Subject length lower bound = 3
Capturing subpattern count = 1 Capturing subpattern count = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'x'
Last code unit = 'b' Last code unit = 'b'
Subject length lower bound = 3 Subject length lower bound = 3
@ -3725,6 +3872,7 @@ Subject length lower bound = 3
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 3 Subject length lower bound = 3
/(?C)a|b/I /(?C)a|b/I
@ -3785,6 +3933,7 @@ No match
Capturing subpattern count = 1 Capturing subpattern count = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = '>'
Last code unit = '<' Last code unit = '<'
Subject length lower bound = 10 Subject length lower bound = 10
>abc>123<xyz< >abc>123<xyz<
@ -3835,6 +3984,7 @@ Subject length lower bound = 2
Capturing subpattern count = 2 Capturing subpattern count = 2
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: ( - 0 1 2 3 4 5 6 7 8 9
Subject length lower bound = 1 Subject length lower bound = 1
12 12
0: 12 0: 12
@ -3854,6 +4004,7 @@ No match
Capturing subpattern count = 2 Capturing subpattern count = 2
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'x'
Subject length lower bound = 3 Subject length lower bound = 3
xyz xyz
0: xyz 0: xyz
@ -3913,6 +4064,7 @@ Failed: error 114 at offset 10: missing closing parenthesis
Capturing subpattern count = 1 Capturing subpattern count = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 9 Subject length lower bound = 9
abcdefabc abcdefabc
0: abcdefabc 0: abcdefabc
@ -3922,6 +4074,7 @@ Subject length lower bound = 9
Capturing subpattern count = 1 Capturing subpattern count = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: a b c
Subject length lower bound = 2 Subject length lower bound = 2
a=a a=a
0: a=a 0: a=a
@ -3937,6 +4090,7 @@ Subject length lower bound = 2
Capturing subpattern count = 2 Capturing subpattern count = 2
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: a b c
Subject length lower bound = 2 Subject length lower bound = 2
a=a a=a
0: a=a 0: a=a
@ -5173,6 +5327,7 @@ No match
Capturing subpattern count = 3 Capturing subpattern count = 3
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: 0 1 2 3 4 5 6 7 8 9
Last code unit = '/' Last code unit = '/'
Subject length lower bound = 6 Subject length lower bound = 6
13/05/04\=ps 13/05/04\=ps
@ -5270,6 +5425,7 @@ Partial match: c12
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: 0 1 2 3 4 5 6 7 8 9
Last code unit = 'X' Last code unit = 'X'
Subject length lower bound = 4 Subject length lower bound = 4
1\=ps 1\=ps
@ -5643,6 +5799,7 @@ Named capturing subpatterns:
A 3 A 3
Compile options: dupnames Compile options: dupnames
Overall options: anchored dupnames Overall options: anchored dupnames
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
a1b\=copy=A a1b\=copy=A
0: a1 0: a1
@ -5680,6 +5837,7 @@ Named capturing subpatterns:
A 2 A 2
Compile options: dupnames Compile options: dupnames
Overall options: anchored dupnames Overall options: anchored dupnames
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
ab\=copy=A ab\=copy=A
0: ab 0: ab
@ -5693,6 +5851,7 @@ Named capturing subpatterns:
A 1 A 1
A 2 A 2
Options: dupnames Options: dupnames
Starting code units: a c
Subject length lower bound = 2 Subject length lower bound = 2
ab\=copy=A ab\=copy=A
0: ab 0: ab
@ -5711,6 +5870,7 @@ Named capturing subpatterns:
A 3 A 3
A 4 A 4
Options: dupnames Options: dupnames
Starting code units: a c
Subject length lower bound = 2 Subject length lower bound = 2
cdefgh\=copy=A cdefgh\=copy=A
0: cdefgh 0: cdefgh
@ -5727,6 +5887,7 @@ Named capturing subpatterns:
A 3 A 3
Compile options: dupnames Compile options: dupnames
Overall options: anchored dupnames Overall options: anchored dupnames
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
a1b\=get=A a1b\=get=A
0: a1 0: a1
@ -5754,6 +5915,7 @@ Named capturing subpatterns:
A 2 A 2
Compile options: dupnames Compile options: dupnames
Overall options: anchored dupnames Overall options: anchored dupnames
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
ab\=get=A ab\=get=A
0: ab 0: ab
@ -5767,6 +5929,7 @@ Named capturing subpatterns:
A 1 A 1
A 2 A 2
Options: dupnames Options: dupnames
Starting code units: a c
Subject length lower bound = 2 Subject length lower bound = 2
ab\=get=A ab\=get=A
0: ab 0: ab
@ -5785,6 +5948,7 @@ Named capturing subpatterns:
A 3 A 3
A 4 A 4
Options: dupnames Options: dupnames
Starting code units: a c
Subject length lower bound = 2 Subject length lower bound = 2
cdefgh\=get=A cdefgh\=get=A
0: cdefgh 0: cdefgh
@ -5802,6 +5966,7 @@ Named capturing subpatterns:
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Duplicate name status changes Duplicate name status changes
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
a1b\=copy=A a1b\=copy=A
0: a1 0: a1
@ -5832,6 +5997,7 @@ Named capturing subpatterns:
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Duplicate name status changes Duplicate name status changes
First code unit = 'a'
Subject length lower bound = 6 Subject length lower bound = 6
a bc d\=copy=A,copy=B,copy=C a bc d\=copy=A,copy=B,copy=C
0: a bc d 0: a bc d
@ -6233,6 +6399,7 @@ Subject length lower bound = 4
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: a b
Last code unit = 'b' Last code unit = 'b'
Subject length lower bound = 2 Subject length lower bound = 2
@ -6249,6 +6416,7 @@ Subject length lower bound = 2
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: a b
Last code unit = 'b' Last code unit = 'b'
Subject length lower bound = 2 Subject length lower bound = 2
@ -6265,6 +6433,7 @@ Subject length lower bound = 2
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Starting code units: a b
Last code unit = 'b' Last code unit = 'b'
Subject length lower bound = 2 Subject length lower bound = 2
@ -6281,6 +6450,7 @@ Subject length lower bound = 2
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Last code unit = 'A' Last code unit = 'A'
Subject length lower bound = 3 Subject length lower bound = 3
aaaA5 aaaA5
@ -6302,6 +6472,7 @@ No match
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: caseless Compile options: caseless
Overall options: anchored caseless Overall options: anchored caseless
Starting code units: A a
Last code unit = 'A' (caseless) Last code unit = 'A' (caseless)
Subject length lower bound = 2 Subject length lower bound = 2
aaaA5 aaaA5
@ -9540,6 +9711,7 @@ Subject length lower bound = 2
Capturing subpattern count = 1 Capturing subpattern count = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'F'
Last code unit = ':' Last code unit = ':'
Subject length lower bound = 22 Subject length lower bound = 22
@ -9691,6 +9863,7 @@ Named capturing subpatterns:
D 1 D 1
Compile options: dupnames extended Compile options: dupnames extended
Overall options: anchored dupnames extended Overall options: anchored dupnames extended
Starting code units: a e
Subject length lower bound = 2 Subject length lower bound = 2
abcdX abcdX
0: abcdX 0: abcdX
@ -10445,12 +10618,14 @@ Failed: error 125 at offset 0: lookbehind assertion is not fixed length
Capturing subpattern count = 1 Capturing subpattern count = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
/(^ab)++/I /(^ab)++/I
Capturing subpattern count = 1 Capturing subpattern count = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
/(^ab|^)+/I /(^ab|^)+/I
@ -10471,12 +10646,14 @@ Subject length lower bound = 0
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
/(?:^ab)++/I /(?:^ab)++/I
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
/(?:^ab|^)+/I /(?:^ab|^)+/I
@ -11586,6 +11763,7 @@ Subject length lower bound = 2
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: dotall Compile options: dotall
Overall options: anchored dotall Overall options: anchored dotall
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
/.*?a(*SKIP)b/I /.*?a(*SKIP)b/I
@ -11608,6 +11786,7 @@ Subject length lower bound = 2
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: dotall Compile options: dotall
Overall options: anchored dotall Overall options: anchored dotall
First code unit = 'a'
Subject length lower bound = 2 Subject length lower bound = 2
/(?>.*?)(?<=(abcd)|(wxyz))/I /(?>.*?)(?<=(abcd)|(wxyz))/I
@ -13375,7 +13554,6 @@ Subject length lower bound = 1
/(|ab)*?d/I,no_start_optimize /(|ab)*?d/I,no_start_optimize
Capturing subpattern count = 1 Capturing subpattern count = 1
Options: no_start_optimize Options: no_start_optimize
Last code unit = 'd'
Subject length lower bound = 0 Subject length lower bound = 0
abd abd
0: abd 0: abd
@ -13641,12 +13819,14 @@ get substring list failed (-2): partial match
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'a'
Subject length lower bound = 3 Subject length lower bound = 3
/^abc/info,no_dotstar_anchor /^abc/info,no_dotstar_anchor
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: no_dotstar_anchor Compile options: no_dotstar_anchor
Overall options: anchored no_dotstar_anchor Overall options: anchored no_dotstar_anchor
First code unit = 'a'
Subject length lower bound = 3 Subject length lower bound = 3
/.*\d/info,auto_callout /.*\d/info,auto_callout
@ -14684,6 +14864,7 @@ Capturing subpattern count = 2
Max back reference = 1 Max back reference = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'o'
Last code unit = '}' Last code unit = '}'
Subject length lower bound = 65535 Subject length lower bound = 65535
@ -15607,6 +15788,7 @@ No match
Capturing subpattern count = 1 Capturing subpattern count = 1
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
First code unit = 'b'
Subject length lower bound = 2 Subject length lower bound = 2
/(a){0}.*bc/sI /(a){0}.*bc/sI
@ -15885,6 +16067,10 @@ No match
No match No match
/^(?!A(?C1)B)C/ /^(?!A(?C1)B)C/
ABC\=callout_error=1,no_jit
No match
/^(?!A(?C1)B)C/no_start_optimize
ABC\=callout_error=1 ABC\=callout_error=1
--->ABC --->ABC
1 ^^ B 1 ^^ B

34
testdata/testoutput5 vendored
View File

@ -194,6 +194,7 @@ Subject length lower bound = 3
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: utf Compile options: utf
Overall options: anchored utf Overall options: anchored utf
Starting code units: a b
Subject length lower bound = 1 Subject length lower bound = 1
bar bar
0: b 0: b
@ -205,28 +206,6 @@ No match
\x{100} \x{100}
No match No match
/^[^ab]/IB,utf
------------------------------------------------------------------
Bra
^
[\x00-`c-\xff] (neg)
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
Compile options: utf
Overall options: anchored utf
Subject length lower bound = 1
c
0: c
\x{ff}
0: \x{ff}
\x{100}
0: \x{100}
\= Expect no match
aaa
No match
/\x{100}*(\d+|"(?1)")/utf /\x{100}*(\d+|"(?1)")/utf
1234 1234
0: 1234 0: 1234
@ -479,7 +458,10 @@ Subject length lower bound = 0
\x{100}X \x{100}X
0: X 0: X
/^\ሴ/IB,utf # Use no_start_optimize because the first code unit is different in 8-bit from
# the wider modes.
/^\ሴ/IB,utf,no_start_optimize
------------------------------------------------------------------ ------------------------------------------------------------------
Bra Bra
^ ^
@ -488,9 +470,9 @@ Subject length lower bound = 0
End End
------------------------------------------------------------------ ------------------------------------------------------------------
Capturing subpattern count = 0 Capturing subpattern count = 0
Compile options: utf Compile options: no_start_optimize utf
Overall options: anchored utf Overall options: anchored no_start_optimize utf
Subject length lower bound = 1 Subject length lower bound = 0
/()()()()()()()()()() /()()()()()()()()()()
()()()()()()()()()() ()()()()()()()()()()