Allow anchored patterns to use "first code unit" optimization.
This commit is contained in:
parent
cc089cf971
commit
b7d5cee61f
|
@ -205,6 +205,11 @@ JIT.
|
|||
subjects from 1000 to 2000 for 8-bit searches, since they use memchr() and are
|
||||
much faster.
|
||||
|
||||
46. Arrange for anchored patterns to record and use "first code unit" data,
|
||||
because this can give a fast "no match" without searching for a "required code
|
||||
unit". Previously only non-anchored patterns did this.
|
||||
|
||||
|
||||
|
||||
Version 10.23 14-February-2017
|
||||
------------------------------
|
||||
|
|
|
@ -9632,14 +9632,19 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
|
|||
is_anchored(codestart, 0, &cb, 0, FALSE))
|
||||
re->overall_options |= PCRE2_ANCHORED;
|
||||
|
||||
/* If the pattern is still not anchored and we do not have a first code unit,
|
||||
see if there is one that is asserted (these are not saved during the compile
|
||||
because they can cause conflicts with actual literals that follow). This code
|
||||
need not be obeyed if PCRE2_NO_START_OPTIMIZE is set, as the data it would
|
||||
create will not be used. */
|
||||
/* Set up the first code unit or startline flag, the required code unit, and
|
||||
then study the pattern. This code need not be obeyed if PCRE2_NO_START_OPTIMIZE
|
||||
is set, as the data it would create will not be used. Note that a first code
|
||||
unit (but not the startline flag) is useful for anchored patterns because it
|
||||
can still give a quick "no match" and also avoid searching for a last code
|
||||
unit. */
|
||||
|
||||
if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0)
|
||||
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
|
||||
{
|
||||
/* If we do not have a first code unit, see if there is one that is asserted
|
||||
(these are not saved during the compile because they can cause conflicts with
|
||||
actual literals that follow). */
|
||||
|
||||
if (firstcuflags < 0)
|
||||
firstcu = find_firstassertedcu(codestart, &firstcuflags, FALSE);
|
||||
|
||||
|
@ -9672,52 +9677,50 @@ if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0)
|
|||
}
|
||||
}
|
||||
|
||||
/* When there is no first code unit, see if we can set the PCRE2_STARTLINE
|
||||
flag. This is helpful for multiline matches when all branches start with ^
|
||||
and also when all branches start with non-atomic .* for non-DOTALL matches
|
||||
when *PRUNE and SKIP are not present. (There is an option that disables this
|
||||
case.) */
|
||||
/* When there is no first code unit, for non-anchored patterns, see if we can
|
||||
set the PCRE2_STARTLINE flag. This is helpful for multiline matches when all
|
||||
branches start with ^ and also when all branches start with non-atomic .* for
|
||||
non-DOTALL matches when *PRUNE and SKIP are not present. (There is an option
|
||||
that disables this case.) */
|
||||
|
||||
else if (is_startline(codestart, 0, &cb, 0, FALSE))
|
||||
else if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
|
||||
is_startline(codestart, 0, &cb, 0, FALSE))
|
||||
re->flags |= PCRE2_STARTLINE;
|
||||
}
|
||||
|
||||
/* Handle the "required code unit", if one is set. In the case of an anchored
|
||||
pattern, do this only if it follows a variable length item in the pattern.
|
||||
Again, skip this if PCRE2_NO_START_OPTIMIZE is set. */
|
||||
/* Handle the "required code unit", if one is set. In the case of an anchored
|
||||
pattern, do this only if it follows a variable length item in the pattern. */
|
||||
|
||||
if (reqcuflags >= 0 &&
|
||||
((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0 ||
|
||||
(reqcuflags & REQ_VARY) != 0))
|
||||
{
|
||||
re->last_codeunit = reqcu;
|
||||
re->flags |= PCRE2_LASTSET;
|
||||
|
||||
/* Handle caseless required code units as for first code units (above). */
|
||||
|
||||
if ((reqcuflags & REQ_CASELESS) != 0)
|
||||
if (reqcuflags >= 0 &&
|
||||
((re->overall_options & PCRE2_ANCHORED) == 0 ||
|
||||
(reqcuflags & REQ_VARY) != 0))
|
||||
{
|
||||
if (reqcu < 128 || (!utf && reqcu < 255))
|
||||
re->last_codeunit = reqcu;
|
||||
re->flags |= PCRE2_LASTSET;
|
||||
|
||||
/* Handle caseless required code units as for first code units (above). */
|
||||
|
||||
if ((reqcuflags & REQ_CASELESS) != 0)
|
||||
{
|
||||
if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
|
||||
}
|
||||
if (reqcu < 128 || (!utf && reqcu < 255))
|
||||
{
|
||||
if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
|
||||
}
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
|
||||
re->flags |= PCRE2_LASTCASELESS;
|
||||
else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
|
||||
re->flags |= PCRE2_LASTCASELESS;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Finally, unless PCRE2_NO_START_OPTIMIZE is set, study the compiled pattern
|
||||
to set up information such as a bitmap of starting code units and a minimum
|
||||
matching length. */
|
||||
/* Finally, study the compiled pattern to set up information such as a bitmap
|
||||
of starting code units and a minimum matching length. */
|
||||
|
||||
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
|
||||
PRIV(study)(re) != 0)
|
||||
{
|
||||
errorcode = ERR31;
|
||||
goto HAD_CB_ERROR;
|
||||
}
|
||||
if (PRIV(study)(re) != 0)
|
||||
{
|
||||
errorcode = ERR31;
|
||||
goto HAD_CB_ERROR;
|
||||
}
|
||||
} /* End of start-of-match optimizations. */
|
||||
|
||||
/* Control ends up here in all cases. When running under valgrind, make a
|
||||
pattern's terminating zero defined again. If memory was obtained for the parsed
|
||||
|
|
|
@ -3341,34 +3341,27 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
|||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Set up the first code unit to match, if available. The first_codeunit value
|
||||
is never set for an anchored regular expression, but the anchoring may be
|
||||
forced at run time, so we have to test for anchoring. The first code unit may
|
||||
be unset for an unanchored pattern, of course. If there's no first code unit
|
||||
there may be a bitmap of possible first characters. */
|
||||
/* Set up the first code unit to match, if available. If there's no first code
|
||||
unit there may be a bitmap of possible first characters. */
|
||||
|
||||
if (!anchored)
|
||||
if ((re->flags & PCRE2_FIRSTSET) != 0)
|
||||
{
|
||||
if ((re->flags & PCRE2_FIRSTSET) != 0)
|
||||
has_first_cu = TRUE;
|
||||
first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
|
||||
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
||||
{
|
||||
has_first_cu = TRUE;
|
||||
first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
|
||||
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
||||
{
|
||||
first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
|
||||
first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (utf && first_cu > 127)
|
||||
first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
|
||||
if (utf && first_cu > 127)
|
||||
first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
|
||||
start_bits = re->start_bitmap;
|
||||
}
|
||||
else
|
||||
if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
|
||||
start_bits = re->start_bitmap;
|
||||
|
||||
/* For anchored or unanchored matches, there may be a "last known required
|
||||
character" set. */
|
||||
/* There may be a "last known required code unit" set. */
|
||||
|
||||
if ((re->flags & PCRE2_LASTSET) != 0)
|
||||
{
|
||||
|
@ -3414,8 +3407,8 @@ for (;;)
|
|||
/* If firstline is TRUE, the start of the match is constrained to the first
|
||||
line of a multiline string. That is, the match must be before or at the
|
||||
first newline. Implement this by temporarily adjusting end_subject so that
|
||||
we stop the optimization scans at a newline. If the match fails at the
|
||||
newline, later code breaks this loop. */
|
||||
we stop the optimization scans for a first code unit at a newline. If the
|
||||
match fails at the newline, later code breaks this loop. */
|
||||
|
||||
if (firstline)
|
||||
{
|
||||
|
@ -3434,70 +3427,138 @@ for (;;)
|
|||
while (t < mb->end_subject && !IS_NEWLINE(t)) t++;
|
||||
end_subject = t;
|
||||
}
|
||||
|
||||
/* Anchored: check the first code unit if one is recorded. This may seem
|
||||
pointless but it can help in detecting a no match case without scanning for
|
||||
the required code unit. */
|
||||
|
||||
/* Advance to a unique first code unit if there is one. */
|
||||
|
||||
if (has_first_cu)
|
||||
if (anchored)
|
||||
{
|
||||
PCRE2_UCHAR smc;
|
||||
if (first_cu != first_cu2)
|
||||
while (start_match < end_subject &&
|
||||
(smc = UCHAR21TEST(start_match)) != first_cu && smc != first_cu2)
|
||||
start_match++;
|
||||
else
|
||||
while (start_match < end_subject && UCHAR21TEST(start_match) != first_cu)
|
||||
start_match++;
|
||||
}
|
||||
|
||||
/* Or to just after a linebreak for a multiline match */
|
||||
|
||||
else if (startline)
|
||||
{
|
||||
if (start_match > mb->start_subject + start_offset)
|
||||
if (has_first_cu || start_bits != NULL)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
BOOL ok = start_match < end_subject;
|
||||
if (ok)
|
||||
{
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
PCRE2_UCHAR c = UCHAR21TEST(start_match);
|
||||
ok = has_first_cu && (c == first_cu || c == first_cu2);
|
||||
if (!ok && start_bits != NULL)
|
||||
{
|
||||
start_match++;
|
||||
ACROSSCHAR(start_match < end_subject, *start_match,
|
||||
start_match++);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 255) c = 255;
|
||||
#endif
|
||||
ok = (start_bits[c/8] & (1 << (c&7))) != 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
start_match++;
|
||||
|
||||
/* If we have just passed a CR and the newline option is ANY or
|
||||
ANYCRLF, and we are now at a LF, advance the match position by one more
|
||||
code unit. */
|
||||
|
||||
if (start_match[-1] == CHAR_CR &&
|
||||
(mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
|
||||
start_match < end_subject &&
|
||||
UCHAR21TEST(start_match) == CHAR_NL)
|
||||
start_match++;
|
||||
if (!ok) break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Or to a non-unique first code unit if any have been identified. The
|
||||
bitmap contains only 256 bits. When code units are 16 or 32 bits wide, all
|
||||
code units greater than 254 set the 255 bit. */
|
||||
/* Not anchored. Advance to a unique first code unit if there is one. In
|
||||
8-bit mode, the use of memchr() gives a big speed up, even though we have
|
||||
to call it twice in caseless mode, in order to find the earliest occurrence
|
||||
of the character in either of its cases. */
|
||||
|
||||
else if (start_bits != NULL)
|
||||
else
|
||||
{
|
||||
while (start_match < end_subject)
|
||||
if (has_first_cu)
|
||||
{
|
||||
uint32_t c = UCHAR21TEST(start_match);
|
||||
if (first_cu != first_cu2) /* Caseless */
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 255) c = 255;
|
||||
PCRE2_UCHAR smc;
|
||||
while (start_match < end_subject &&
|
||||
(smc = UCHAR21TEST(start_match)) != first_cu &&
|
||||
smc != first_cu2)
|
||||
start_match++;
|
||||
#else /* 8-bit code units */
|
||||
PCRE2_SPTR pp1 =
|
||||
memchr(start_match, first_cu, end_subject-start_match);
|
||||
PCRE2_SPTR pp2 =
|
||||
memchr(start_match, first_cu2, end_subject-start_match);
|
||||
if (pp1 == NULL)
|
||||
start_match = (pp2 == NULL)? end_subject : pp2;
|
||||
else
|
||||
start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
|
||||
#endif
|
||||
if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
|
||||
start_match++;
|
||||
}
|
||||
|
||||
/* The caseful case */
|
||||
|
||||
else
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
while (start_match < end_subject && UCHAR21TEST(start_match) !=
|
||||
first_cu)
|
||||
start_match++;
|
||||
#else
|
||||
start_match = memchr(start_match, first_cu, end_subject - start_match);
|
||||
if (start_match == NULL) start_match = end_subject;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* If we can't find the required code unit, break the bumpalong loop,
|
||||
to force a match failure, except when doing partial matching, when we
|
||||
let the next cycle run at the end of the subject. To see why, consider
|
||||
the pattern /(?<=abc)def/, which partially matches "abc", even though
|
||||
the string does not contain the starting character "d". */
|
||||
|
||||
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 &&
|
||||
start_match >= end_subject)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* If there's no first code unit, advance to just after a linebreak for a
|
||||
multiline match if required. */
|
||||
|
||||
else if (startline)
|
||||
{
|
||||
if (start_match > mb->start_subject + start_offset)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
{
|
||||
start_match++;
|
||||
ACROSSCHAR(start_match < end_subject, *start_match,
|
||||
start_match++);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
start_match++;
|
||||
|
||||
/* If we have just passed a CR and the newline option is ANY or
|
||||
ANYCRLF, and we are now at a LF, advance the match position by one
|
||||
more code unit. */
|
||||
|
||||
if (start_match[-1] == CHAR_CR &&
|
||||
(mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
|
||||
start_match < end_subject &&
|
||||
UCHAR21TEST(start_match) == CHAR_NL)
|
||||
start_match++;
|
||||
}
|
||||
}
|
||||
|
||||
/* If there's no first code unit or a requirement for a multiline line
|
||||
start, advance to a non-unique first code unit if any have been
|
||||
identified. The bitmap contains only 256 bits. When code units are 16 or
|
||||
32 bits wide, all code units greater than 254 set the 255 bit. */
|
||||
|
||||
else if (start_bits != NULL)
|
||||
{
|
||||
while (start_match < end_subject)
|
||||
{
|
||||
uint32_t c = UCHAR21TEST(start_match);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 255) c = 255;
|
||||
#endif
|
||||
if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
|
||||
start_match++;
|
||||
}
|
||||
}
|
||||
} /* End of first code unit handling */
|
||||
|
||||
/* Restore fudged end_subject */
|
||||
|
||||
|
|
|
@ -270,7 +270,7 @@ pcre2_callout_block cb;
|
|||
|
||||
*lengthptr = (*Fecode == OP_CALLOUT)?
|
||||
PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
|
||||
|
||||
|
||||
if (mb->callout == NULL) return 0; /* No callout function provided */
|
||||
|
||||
/* The original matching code (pre 10.30) worked directly with the ovector
|
||||
|
@ -279,11 +279,11 @@ ovector is in the backtracking frame, it no longer needs to reserve space for
|
|||
the overall match offsets (which would waste space in the frame). For backward
|
||||
compatibility, however, we pass capture_top and offset_vector to the callout as
|
||||
if for the extended ovector, and we ensure that the first two slots are unset
|
||||
by preserving and restoring their current contents. Picky compilers complain if
|
||||
references such as Fovector[-2] are use directly, so we set up a separate
|
||||
by preserving and restoring their current contents. Picky compilers complain if
|
||||
references such as Fovector[-2] are use directly, so we set up a separate
|
||||
pointer. */
|
||||
|
||||
callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
|
||||
callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
|
||||
|
||||
cb.version = 1;
|
||||
cb.capture_top = (uint32_t)Foffset_top/2 + 1;
|
||||
|
@ -935,8 +935,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
|
||||
/* ===================================================================== */
|
||||
/* Match a single character, caselessly. If we are at the end of the
|
||||
subject, give up immediately. We get here only when the pattern character
|
||||
has at most one other case. Characters with more than two cases are coded
|
||||
subject, give up immediately. We get here only when the pattern character
|
||||
has at most one other case. Characters with more than two cases are coded
|
||||
as OP_PROP with the pseudo-property PT_CLIST. */
|
||||
|
||||
case OP_CHARI:
|
||||
|
@ -954,7 +954,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
GETCHARLEN(fc, Fecode, Flength);
|
||||
|
||||
/* If the pattern character's value is < 128, we know that its other case
|
||||
(if any) is also < 128 (and therefore only one code unit long in all
|
||||
(if any) is also < 128 (and therefore only one code unit long in all
|
||||
code-unit widths), so we can use the fast lookup table. We checked above
|
||||
that there is at least one character left in the subject. */
|
||||
|
||||
|
@ -966,7 +966,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
Feptr++;
|
||||
}
|
||||
|
||||
/* Otherwise we must pick up the subject character and use Unicode
|
||||
/* Otherwise we must pick up the subject character and use Unicode
|
||||
property support to test its other case. Note that we cannot use the
|
||||
value of "Flength" to check for sufficient bytes left, because the other
|
||||
case of the character may have more or fewer code units. */
|
||||
|
@ -3056,7 +3056,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
}
|
||||
Feptr += Lmin;
|
||||
break;
|
||||
|
||||
|
||||
/* This OP_ANYBYTE case will never be reached because \C gets turned
|
||||
into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
|
||||
reports don't complain about it's never being used. */
|
||||
|
@ -5352,8 +5352,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
(char *)assert_accept_frame + offsetof(heapframe, ovector),
|
||||
assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
|
||||
Foffset_top = assert_accept_frame->offset_top;
|
||||
|
||||
/* Fall through */
|
||||
|
||||
/* Fall through */
|
||||
/* In the case of a match, the captures have already been put into
|
||||
the current frame. */
|
||||
|
||||
|
@ -5650,7 +5650,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
|
||||
if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
|
||||
|
||||
/* Fall through */
|
||||
/* Fall through */
|
||||
/* Unconditional end of subject assertion (\z) */
|
||||
|
||||
case OP_EOD:
|
||||
|
@ -6280,7 +6280,7 @@ The last of these is changed within the match() function if the frame vector
|
|||
has to be expanded. We therefore put it into the match block so that it is
|
||||
correct when calling match() more than once for non-anchored patterns. */
|
||||
|
||||
frame_size = offsetof(heapframe, ovector) +
|
||||
frame_size = offsetof(heapframe, ovector) +
|
||||
re->top_bracket * 2 * sizeof(PCRE2_SIZE);
|
||||
|
||||
/* Limits set in the pattern override the match context only if they are
|
||||
|
@ -6333,33 +6333,26 @@ mb->lcc = re->tables + lcc_offset;
|
|||
mb->fcc = re->tables + fcc_offset;
|
||||
mb->ctypes = re->tables + ctypes_offset;
|
||||
|
||||
/* Set up the first code unit to match, if available. The first_codeunit value
|
||||
is never set for an anchored regular expression, but the anchoring may be
|
||||
forced at run time, so we have to test for anchoring. The first code unit may
|
||||
be unset for an unanchored pattern, of course. If there's no first code unit
|
||||
there may be a bitmap of possible first characters. */
|
||||
/* Set up the first code unit to match, if available. If there's no first code
|
||||
unit there may be a bitmap of possible first characters. */
|
||||
|
||||
if (!anchored)
|
||||
if ((re->flags & PCRE2_FIRSTSET) != 0)
|
||||
{
|
||||
if ((re->flags & PCRE2_FIRSTSET) != 0)
|
||||
has_first_cu = TRUE;
|
||||
first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
|
||||
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
||||
{
|
||||
has_first_cu = TRUE;
|
||||
first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
|
||||
if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
|
||||
{
|
||||
first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
|
||||
first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
|
||||
if (utf && first_cu > 127) first_cu2 = UCD_OTHERCASE(first_cu);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
|
||||
start_bits = re->start_bitmap;
|
||||
}
|
||||
else
|
||||
if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
|
||||
start_bits = re->start_bitmap;
|
||||
|
||||
/* For anchored or unanchored matches, there may be a "last known required
|
||||
character" set. */
|
||||
/* There may also be a "last known required character" set. */
|
||||
|
||||
if ((re->flags & PCRE2_LASTSET) != 0)
|
||||
{
|
||||
|
@ -6398,8 +6391,8 @@ for(;;)
|
|||
/* If firstline is TRUE, the start of the match is constrained to the first
|
||||
line of a multiline string. That is, the match must be before or at the
|
||||
first newline. Implement this by temporarily adjusting end_subject so that
|
||||
we stop the optimization scans at a newline. If the match fails at the
|
||||
newline, later code breaks this loop. */
|
||||
we stop the optimization scans for a first code unit at a newline. If the
|
||||
match fails at the newline, later code breaks this loop. */
|
||||
|
||||
if (firstline)
|
||||
{
|
||||
|
@ -6419,107 +6412,143 @@ for(;;)
|
|||
end_subject = t;
|
||||
}
|
||||
|
||||
/* Advance to a unique first code unit if there is one. In 8-bit mode, the
|
||||
use of memchr() gives a big speed up, even though we have to call it twice
|
||||
in caseless mode, in order to find the first occurrence of the character in
|
||||
either of its cases. */
|
||||
/* Anchored: check the first code unit if one is recorded. This may seem
|
||||
pointless but it can help in detecting a no match case without scanning for
|
||||
the required code unit. */
|
||||
|
||||
if (has_first_cu)
|
||||
if (anchored)
|
||||
{
|
||||
if (first_cu != first_cu2) /* Caseless */
|
||||
if (has_first_cu || start_bits != NULL)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
PCRE2_UCHAR smc;
|
||||
while (start_match < end_subject &&
|
||||
(smc = UCHAR21TEST(start_match)) != first_cu && smc != first_cu2)
|
||||
start_match++;
|
||||
#else /* 8-bit code units */
|
||||
PCRE2_SPTR pp1 = memchr(start_match, first_cu, end_subject-start_match);
|
||||
PCRE2_SPTR pp2 = memchr(start_match, first_cu2, end_subject-start_match);
|
||||
if (pp1 == NULL)
|
||||
start_match = (pp2 == NULL)? end_subject : pp2;
|
||||
else
|
||||
start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* The caseful case */
|
||||
|
||||
else
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
while (start_match < end_subject && UCHAR21TEST(start_match) != first_cu)
|
||||
start_match++;
|
||||
#else
|
||||
start_match = memchr(start_match, first_cu, end_subject - start_match);
|
||||
if (start_match == NULL) start_match = end_subject;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* If we can't find the required code unit, break the bumpalong loop, to
|
||||
force a match failure, except when doing partial matching, when we let
|
||||
the next cycle run at the end of the subject. To see why, consider the
|
||||
pattern /(?<=abc)def/, which partially matches "abc", even though the
|
||||
string does not contain the starting character "d". */
|
||||
|
||||
if (!mb->partial && start_match >= end_subject)
|
||||
{
|
||||
rc = MATCH_NOMATCH;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* If there's no first code unit, advance to just after a linebreak for a
|
||||
multiline match if required. */
|
||||
|
||||
else if (startline)
|
||||
{
|
||||
if (start_match > mb->start_subject + start_offset)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
BOOL ok = start_match < end_subject;
|
||||
if (ok)
|
||||
{
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
PCRE2_UCHAR c = UCHAR21TEST(start_match);
|
||||
ok = has_first_cu && (c == first_cu || c == first_cu2);
|
||||
if (!ok && start_bits != NULL)
|
||||
{
|
||||
start_match++;
|
||||
ACROSSCHAR(start_match < end_subject, *start_match,
|
||||
start_match++);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 255) c = 255;
|
||||
#endif
|
||||
ok = (start_bits[c/8] & (1 << (c&7))) != 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
start_match++;
|
||||
|
||||
/* If we have just passed a CR and the newline option is ANY or
|
||||
ANYCRLF, and we are now at a LF, advance the match position by one more
|
||||
code unit. */
|
||||
|
||||
if (start_match[-1] == CHAR_CR &&
|
||||
(mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
|
||||
start_match < end_subject &&
|
||||
UCHAR21TEST(start_match) == CHAR_NL)
|
||||
start_match++;
|
||||
if (!ok)
|
||||
{
|
||||
rc = MATCH_NOMATCH;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If there's no first code unit or a requirement for a multiline line
|
||||
start, advance to a non-unique first code unit if any have been identified.
|
||||
The bitmap contains only 256 bits. When code units are 16 or 32 bits wide,
|
||||
all code units greater than 254 set the 255 bit. */
|
||||
/* Not anchored. Advance to a unique first code unit if there is one. In
|
||||
8-bit mode, the use of memchr() gives a big speed up, even though we have
|
||||
to call it twice in caseless mode, in order to find the earliest occurrence
|
||||
of the character in either of its cases. */
|
||||
|
||||
else if (start_bits != NULL)
|
||||
else
|
||||
{
|
||||
while (start_match < end_subject)
|
||||
if (has_first_cu)
|
||||
{
|
||||
uint32_t c = UCHAR21TEST(start_match);
|
||||
if (first_cu != first_cu2) /* Caseless */
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 255) c = 255;
|
||||
PCRE2_UCHAR smc;
|
||||
while (start_match < end_subject &&
|
||||
(smc = UCHAR21TEST(start_match)) != first_cu &&
|
||||
smc != first_cu2)
|
||||
start_match++;
|
||||
#else /* 8-bit code units */
|
||||
PCRE2_SPTR pp1 =
|
||||
memchr(start_match, first_cu, end_subject-start_match);
|
||||
PCRE2_SPTR pp2 =
|
||||
memchr(start_match, first_cu2, end_subject-start_match);
|
||||
if (pp1 == NULL)
|
||||
start_match = (pp2 == NULL)? end_subject : pp2;
|
||||
else
|
||||
start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
|
||||
#endif
|
||||
if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
|
||||
start_match++;
|
||||
}
|
||||
|
||||
/* The caseful case */
|
||||
|
||||
else
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
while (start_match < end_subject && UCHAR21TEST(start_match) !=
|
||||
first_cu)
|
||||
start_match++;
|
||||
#else
|
||||
start_match = memchr(start_match, first_cu, end_subject - start_match);
|
||||
if (start_match == NULL) start_match = end_subject;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* If we can't find the required code unit, break the bumpalong loop,
|
||||
to force a match failure, except when doing partial matching, when we
|
||||
let the next cycle run at the end of the subject. To see why, consider
|
||||
the pattern /(?<=abc)def/, which partially matches "abc", even though
|
||||
the string does not contain the starting character "d". */
|
||||
|
||||
if (!mb->partial && start_match >= end_subject)
|
||||
{
|
||||
rc = MATCH_NOMATCH;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If there's no first code unit, advance to just after a linebreak for a
|
||||
multiline match if required. */
|
||||
|
||||
else if (startline)
|
||||
{
|
||||
if (start_match > mb->start_subject + start_offset)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
{
|
||||
start_match++;
|
||||
ACROSSCHAR(start_match < end_subject, *start_match,
|
||||
start_match++);
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
start_match++;
|
||||
|
||||
/* If we have just passed a CR and the newline option is ANY or
|
||||
ANYCRLF, and we are now at a LF, advance the match position by one
|
||||
more code unit. */
|
||||
|
||||
if (start_match[-1] == CHAR_CR &&
|
||||
(mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
|
||||
start_match < end_subject &&
|
||||
UCHAR21TEST(start_match) == CHAR_NL)
|
||||
start_match++;
|
||||
}
|
||||
}
|
||||
|
||||
/* If there's no first code unit or a requirement for a multiline line
|
||||
start, advance to a non-unique first code unit if any have been
|
||||
identified. The bitmap contains only 256 bits. When code units are 16 or
|
||||
32 bits wide, all code units greater than 254 set the 255 bit. */
|
||||
|
||||
else if (start_bits != NULL)
|
||||
{
|
||||
while (start_match < end_subject)
|
||||
{
|
||||
uint32_t c = UCHAR21TEST(start_match);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 255) c = 255;
|
||||
#endif
|
||||
if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
|
||||
start_match++;
|
||||
}
|
||||
}
|
||||
} /* End first code unit handling */
|
||||
|
||||
/* Restore fudged end_subject */
|
||||
|
||||
|
|
|
@ -799,7 +799,7 @@ if (caseless)
|
|||
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Not UTF */
|
||||
|
@ -953,7 +953,6 @@ do
|
|||
case OP_ALLANY:
|
||||
case OP_ANY:
|
||||
case OP_ANYBYTE:
|
||||
case OP_CIRC:
|
||||
case OP_CIRCM:
|
||||
case OP_CLOSE:
|
||||
case OP_COMMIT:
|
||||
|
@ -1021,6 +1020,13 @@ do
|
|||
case OP_THEN_ARG:
|
||||
return SSB_FAIL;
|
||||
|
||||
/* OP_CIRC happens only at the start of an anchored branch (multiline ^
|
||||
uses OP_CIRCM). Skip over it. */
|
||||
|
||||
case OP_CIRC:
|
||||
tcode += PRIV(OP_lengths)[OP_CIRC];
|
||||
break;
|
||||
|
||||
/* A "real" property test implies no starting bits, but the fake property
|
||||
PT_CLIST identifies a list of characters. These lists are short, as they
|
||||
are used for characters with more than one "other case", so there is no
|
||||
|
@ -1450,7 +1456,7 @@ do
|
|||
#endif
|
||||
/* It seems that the fall through comment must be outside the #ifdef if
|
||||
it is to avoid the gcc compiler warning. */
|
||||
|
||||
|
||||
/* Fall through */
|
||||
|
||||
/* Enter here for a negative non-XCLASS. In the 8-bit library, if we are
|
||||
|
@ -1579,12 +1585,11 @@ BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
|||
code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
|
||||
re->name_entry_size * re->name_count;
|
||||
|
||||
/* For an anchored pattern, or an unanchored pattern that has a first code
|
||||
unit, or a multiline pattern that matches only at "line start", there is no
|
||||
point in seeking a list of starting code units. */
|
||||
/* For a pattern that has a first code unit, or a multiline pattern that
|
||||
matches only at "line start", there is no point in seeking a list of starting
|
||||
code units. */
|
||||
|
||||
if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
|
||||
(re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
|
||||
if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
|
||||
{
|
||||
int rc = set_start_bits(re, code, utf);
|
||||
if (rc == SSB_UNKNOWN) return 1;
|
||||
|
|
|
@ -466,5 +466,14 @@
|
|||
|
||||
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
|
||||
\x{dfff}\x{df01}\=no_utf_check
|
||||
|
||||
# This has different starting code units in 8-bit mode.
|
||||
|
||||
/^[^ab]/IB,utf
|
||||
c
|
||||
\x{ff}
|
||||
\x{100}
|
||||
\= Expect no match
|
||||
aaa
|
||||
|
||||
# End of testinput10
|
||||
|
|
|
@ -373,4 +373,13 @@
|
|||
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
|
||||
\x{dfff}\x{df01}\=no_utf_check
|
||||
|
||||
# This has different starting code units in 8-bit mode.
|
||||
|
||||
/^[^ab]/IB,utf
|
||||
c
|
||||
\x{ff}
|
||||
\x{100}
|
||||
\= Expect no match
|
||||
aaa
|
||||
|
||||
# End of testinput12
|
||||
|
|
|
@ -5256,6 +5256,9 @@ a)"xI
|
|||
XAB
|
||||
|
||||
/^(?!A(?C1)B)C/
|
||||
ABC\=callout_error=1,no_jit
|
||||
|
||||
/^(?!A(?C1)B)C/no_start_optimize
|
||||
ABC\=callout_error=1
|
||||
|
||||
/^(?(?!A(?C1)B)C)/
|
||||
|
|
|
@ -120,13 +120,6 @@
|
|||
\x{ff}
|
||||
\x{100}
|
||||
|
||||
/^[^ab]/IB,utf
|
||||
c
|
||||
\x{ff}
|
||||
\x{100}
|
||||
\= Expect no match
|
||||
aaa
|
||||
|
||||
/\x{100}*(\d+|"(?1)")/utf
|
||||
1234
|
||||
"1234"
|
||||
|
@ -190,7 +183,10 @@
|
|||
/\w/utf
|
||||
\x{100}X
|
||||
|
||||
/^\ሴ/IB,utf
|
||||
# Use no_start_optimize because the first code unit is different in 8-bit from
|
||||
# the wider modes.
|
||||
|
||||
/^\ሴ/IB,utf,no_start_optimize
|
||||
|
||||
/()()()()()()()()()()
|
||||
()()()()()()()()()()
|
||||
|
|
|
@ -1585,5 +1585,39 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP),
|
|||
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
|
||||
\x{dfff}\x{df01}\=no_utf_check
|
||||
0: \x{dfff}\x{df01}
|
||||
|
||||
# This has different starting code units in 8-bit mode.
|
||||
|
||||
/^[^ab]/IB,utf
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
^
|
||||
[\x00-`c-\xff] (neg)
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Compile options: utf
|
||||
Overall options: anchored utf
|
||||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
|
||||
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
|
||||
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
|
||||
5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
|
||||
Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
|
||||
\xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
|
||||
\xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
|
||||
\xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
|
||||
\xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
|
||||
\xfe \xff
|
||||
Subject length lower bound = 1
|
||||
c
|
||||
0: c
|
||||
\x{ff}
|
||||
0: \x{ff}
|
||||
\x{100}
|
||||
0: \x{100}
|
||||
\= Expect no match
|
||||
aaa
|
||||
No match
|
||||
|
||||
# End of testinput10
|
||||
|
|
|
@ -1433,4 +1433,42 @@ Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowe
|
|||
Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
|
||||
\x{dfff}\x{df01}\=no_utf_check
|
||||
|
||||
# This has different starting code units in 8-bit mode.
|
||||
|
||||
/^[^ab]/IB,utf
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
^
|
||||
[\x00-`c-\xff] (neg)
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Compile options: utf
|
||||
Overall options: anchored utf
|
||||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
|
||||
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
|
||||
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
|
||||
5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
|
||||
Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
|
||||
\x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
|
||||
\x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
|
||||
\x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
|
||||
\xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
|
||||
\xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca
|
||||
\xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9
|
||||
\xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8
|
||||
\xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7
|
||||
\xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
|
||||
Subject length lower bound = 1
|
||||
c
|
||||
0: c
|
||||
\x{ff}
|
||||
0: \x{ff}
|
||||
\x{100}
|
||||
0: \x{100}
|
||||
\= Expect no match
|
||||
aaa
|
||||
No match
|
||||
|
||||
# End of testinput12
|
||||
|
|
|
@ -1425,4 +1425,42 @@ No match
|
|||
\x{dfff}\x{df01}\=no_utf_check
|
||||
0: \x{dfff}\x{df01}
|
||||
|
||||
# This has different starting code units in 8-bit mode.
|
||||
|
||||
/^[^ab]/IB,utf
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
^
|
||||
[\x00-`c-\xff] (neg)
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Compile options: utf
|
||||
Overall options: anchored utf
|
||||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
|
||||
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
|
||||
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
|
||||
5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
|
||||
Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
|
||||
\x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
|
||||
\x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
|
||||
\x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
|
||||
\xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
|
||||
\xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca
|
||||
\xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9
|
||||
\xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8
|
||||
\xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7
|
||||
\xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
|
||||
Subject length lower bound = 1
|
||||
c
|
||||
0: c
|
||||
\x{ff}
|
||||
0: \x{ff}
|
||||
\x{100}
|
||||
0: \x{100}
|
||||
\= Expect no match
|
||||
aaa
|
||||
No match
|
||||
|
||||
# End of testinput12
|
||||
|
|
|
@ -368,6 +368,7 @@ No match
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 6
|
||||
JIT compilation was successful
|
||||
#pop jitverify
|
||||
|
@ -379,6 +380,7 @@ JIT compilation was successful
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 6
|
||||
JIT compilation was successful
|
||||
#save testsaved1
|
||||
|
|
|
@ -72,6 +72,7 @@ No match
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 3
|
||||
abc
|
||||
0: abc
|
||||
|
@ -110,6 +111,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 3
|
||||
abc
|
||||
0: abc
|
||||
|
@ -339,6 +341,7 @@ Subject length lower bound = 19
|
|||
/the quick brown fox/I,anchored
|
||||
Capturing subpattern count = 0
|
||||
Options: anchored
|
||||
First code unit = 't'
|
||||
Subject length lower bound = 19
|
||||
the quick brown fox
|
||||
0: the quick brown fox
|
||||
|
@ -351,6 +354,7 @@ Failed: error 111 at offset 4: unrecognized character after (? or (?-
|
|||
|
||||
/^abc|def/I
|
||||
Capturing subpattern count = 0
|
||||
Starting code units: a d
|
||||
Subject length lower bound = 3
|
||||
abcdef
|
||||
0: abc
|
||||
|
@ -495,12 +499,14 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = '1'
|
||||
Subject length lower bound = 4
|
||||
|
||||
/(^b|(?i)^d)/I
|
||||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: D b d
|
||||
Subject length lower bound = 1
|
||||
|
||||
/(?s).*/I
|
||||
|
@ -624,6 +630,7 @@ Capturing subpattern count = 0
|
|||
Max lookbehind = 1
|
||||
Compile options: multiline
|
||||
Overall options: anchored multiline
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 3
|
||||
|
||||
/^abc/Im
|
||||
|
@ -637,6 +644,7 @@ Subject length lower bound = 3
|
|||
Capturing subpattern count = 5
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 3
|
||||
aaaaabbbbbcccccdef
|
||||
0: aaaaabbbbbcccccdef
|
||||
|
@ -808,6 +816,7 @@ Capturing subpattern count = 1
|
|||
Max back reference = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: a
|
||||
Subject length lower bound = 4
|
||||
\= Expect no match
|
||||
aaaa
|
||||
|
@ -1004,6 +1013,7 @@ Subject length lower bound = 16
|
|||
Capturing subpattern count = 3
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: a b
|
||||
Subject length lower bound = 4
|
||||
adef\=get=1,get=2,get=3,get=4,getall
|
||||
0: adef
|
||||
|
@ -1042,6 +1052,7 @@ Get substring 4 failed (-49): unknown substring
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 7
|
||||
abc\00def\=copy=0,getall
|
||||
0: abc\x00def
|
||||
|
@ -1227,6 +1238,7 @@ Subject length lower bound = 3
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'i'
|
||||
Subject length lower bound = 3
|
||||
ississippi
|
||||
0: iss
|
||||
|
@ -1286,6 +1298,7 @@ Capturing subpattern count = 0
|
|||
Contains explicit CR or LF match
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 3
|
||||
ab\nab\ncd
|
||||
0: ab\x0a
|
||||
|
@ -1776,6 +1789,8 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
|
||||
Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:^alnum:]]/IB
|
||||
|
@ -1789,6 +1804,18 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
|
||||
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
|
||||
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
|
||||
? @ [ \ ] ^ _ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88
|
||||
\x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97
|
||||
\x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6
|
||||
\xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5
|
||||
\xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4
|
||||
\xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3
|
||||
\xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2
|
||||
\xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1
|
||||
\xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:alpha:]]/IB
|
||||
|
@ -1802,6 +1829,8 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
|
||||
a b c d e f g h i j k l m n o p q r s t u v w x y z
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:^alpha:]]/IB
|
||||
|
@ -1815,6 +1844,19 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
|
||||
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
|
||||
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
|
||||
5 6 7 8 9 : ; < = > ? @ [ \ ] ^ _ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
|
||||
\x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93
|
||||
\x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2
|
||||
\xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1
|
||||
\xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0
|
||||
\xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf
|
||||
\xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde
|
||||
\xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed
|
||||
\xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc
|
||||
\xfd \xfe \xff
|
||||
Subject length lower bound = 1
|
||||
|
||||
/[_[:alpha:]]/I
|
||||
|
@ -1834,6 +1876,12 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
|
||||
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
|
||||
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
|
||||
5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
|
||||
Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~
|
||||
\x7f
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:^ascii:]]/IB
|
||||
|
@ -1847,6 +1895,15 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a
|
||||
\x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99
|
||||
\x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8
|
||||
\xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7
|
||||
\xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6
|
||||
\xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5
|
||||
\xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4
|
||||
\xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3
|
||||
\xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:blank:]]/IB
|
||||
|
@ -1860,6 +1917,7 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: \x09 \x20
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:^blank:]]/IB
|
||||
|
@ -1873,6 +1931,20 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
|
||||
\x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
|
||||
\x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
|
||||
: ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^
|
||||
_ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80
|
||||
\x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f
|
||||
\x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e
|
||||
\x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad
|
||||
\xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc
|
||||
\xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb
|
||||
\xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda
|
||||
\xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9
|
||||
\xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8
|
||||
\xf9 \xfa \xfb \xfc \xfd \xfe \xff
|
||||
Subject length lower bound = 1
|
||||
|
||||
/[\n\x0b\x0c\x0d[:blank:]]/I
|
||||
|
@ -1892,6 +1964,9 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
|
||||
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
|
||||
\x1a \x1b \x1c \x1d \x1e \x1f \x7f
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:digit:]]/IB
|
||||
|
@ -1905,6 +1980,7 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: 0 1 2 3 4 5 6 7 8 9
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:graph:]]/IB
|
||||
|
@ -1918,6 +1994,9 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 :
|
||||
; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _
|
||||
` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:lower:]]/IB
|
||||
|
@ -1931,6 +2010,7 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: a b c d e f g h i j k l m n o p q r s t u v w x y z
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:print:]]/IB
|
||||
|
@ -1944,6 +2024,9 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8
|
||||
9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ]
|
||||
^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:punct:]]/IB
|
||||
|
@ -1957,6 +2040,8 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^
|
||||
_ ` { | } ~
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:space:]]/IB
|
||||
|
@ -1970,6 +2055,7 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:upper:]]/IB
|
||||
|
@ -1983,6 +2069,7 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:xdigit:]]/IB
|
||||
|
@ -1996,6 +2083,7 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F a b c d e f
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:word:]]/IB
|
||||
|
@ -2009,6 +2097,8 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
|
||||
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:^cntrl:]]/IB
|
||||
|
@ -2022,6 +2112,18 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8
|
||||
9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ]
|
||||
^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x80 \x81
|
||||
\x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90
|
||||
\x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f
|
||||
\xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae
|
||||
\xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd
|
||||
\xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc
|
||||
\xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb
|
||||
\xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea
|
||||
\xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9
|
||||
\xfa \xfb \xfc \xfd \xfe \xff
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[12[:^digit:]]/IB
|
||||
|
@ -2035,6 +2137,20 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
|
||||
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
|
||||
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 1 2 : ; <
|
||||
= > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a
|
||||
b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
|
||||
\x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91
|
||||
\x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0
|
||||
\xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf
|
||||
\xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe
|
||||
\xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd
|
||||
\xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc
|
||||
\xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb
|
||||
\xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa
|
||||
\xfb \xfc \xfd \xfe \xff
|
||||
Subject length lower bound = 1
|
||||
|
||||
/^[[:^blank:]]/IB
|
||||
|
@ -2048,6 +2164,20 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0a \x0b
|
||||
\x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a
|
||||
\x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9
|
||||
: ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^
|
||||
_ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80
|
||||
\x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f
|
||||
\x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e
|
||||
\x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad
|
||||
\xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc
|
||||
\xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb
|
||||
\xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda
|
||||
\xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9
|
||||
\xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8
|
||||
\xf9 \xfa \xfb \xfc \xfd \xfe \xff
|
||||
Subject length lower bound = 1
|
||||
|
||||
/[01[:alpha:]%]/IB
|
||||
|
@ -2418,6 +2548,7 @@ Subject length lower bound = 4
|
|||
Capturing subpattern count = 2
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 1
|
||||
aba
|
||||
0: aba
|
||||
|
@ -2428,6 +2559,7 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 2
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
aabbaa
|
||||
0: aabbaa
|
||||
|
@ -2438,6 +2570,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 2
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
aabbaa
|
||||
0: aabbaa
|
||||
|
@ -2448,6 +2581,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 2
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
aabbaa
|
||||
0: aabbaa
|
||||
|
@ -2458,6 +2592,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
aabbaa
|
||||
0: aabbaa
|
||||
|
@ -2467,6 +2602,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 3
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
aabbaa
|
||||
0: aabbaa
|
||||
|
@ -2478,6 +2614,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 2
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
aabbaa
|
||||
0: aabbaa
|
||||
|
@ -2488,6 +2625,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
aabbaa
|
||||
0: aabbaa
|
||||
|
@ -2497,6 +2635,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
aabbbaa
|
||||
0: aabbbaa
|
||||
|
@ -2506,6 +2645,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
aabbbaa
|
||||
0: aabbbaa
|
||||
|
@ -2515,6 +2655,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
aabbaa
|
||||
0: aabbaa
|
||||
|
@ -2524,6 +2665,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
aabbbaa
|
||||
0: aabbbaa
|
||||
|
@ -2533,6 +2675,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 3
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
aabbbaa
|
||||
0: aabbbaa
|
||||
|
@ -2544,6 +2687,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 3
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
aabbbbaa
|
||||
0: aabbbbaa
|
||||
|
@ -3052,6 +3196,7 @@ Subject length lower bound = 3
|
|||
Capturing subpattern count = 5
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 3
|
||||
|
||||
/^x(?U)a+b/IB
|
||||
|
@ -3067,6 +3212,7 @@ Subject length lower bound = 3
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'x'
|
||||
Last code unit = 'b'
|
||||
Subject length lower bound = 3
|
||||
|
||||
|
@ -3085,6 +3231,7 @@ Subject length lower bound = 3
|
|||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'x'
|
||||
Last code unit = 'b'
|
||||
Subject length lower bound = 3
|
||||
|
||||
|
@ -3725,6 +3872,7 @@ Subject length lower bound = 3
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 3
|
||||
|
||||
/(?C)a|b/I
|
||||
|
@ -3785,6 +3933,7 @@ No match
|
|||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = '>'
|
||||
Last code unit = '<'
|
||||
Subject length lower bound = 10
|
||||
>abc>123<xyz<
|
||||
|
@ -3835,6 +3984,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 2
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: ( - 0 1 2 3 4 5 6 7 8 9
|
||||
Subject length lower bound = 1
|
||||
12
|
||||
0: 12
|
||||
|
@ -3854,6 +4004,7 @@ No match
|
|||
Capturing subpattern count = 2
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'x'
|
||||
Subject length lower bound = 3
|
||||
xyz
|
||||
0: xyz
|
||||
|
@ -3913,6 +4064,7 @@ Failed: error 114 at offset 10: missing closing parenthesis
|
|||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 9
|
||||
abcdefabc
|
||||
0: abcdefabc
|
||||
|
@ -3922,6 +4074,7 @@ Subject length lower bound = 9
|
|||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: a b c
|
||||
Subject length lower bound = 2
|
||||
a=a
|
||||
0: a=a
|
||||
|
@ -3937,6 +4090,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 2
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: a b c
|
||||
Subject length lower bound = 2
|
||||
a=a
|
||||
0: a=a
|
||||
|
@ -5173,6 +5327,7 @@ No match
|
|||
Capturing subpattern count = 3
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: 0 1 2 3 4 5 6 7 8 9
|
||||
Last code unit = '/'
|
||||
Subject length lower bound = 6
|
||||
13/05/04\=ps
|
||||
|
@ -5270,6 +5425,7 @@ Partial match: c12
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: 0 1 2 3 4 5 6 7 8 9
|
||||
Last code unit = 'X'
|
||||
Subject length lower bound = 4
|
||||
1\=ps
|
||||
|
@ -5643,6 +5799,7 @@ Named capturing subpatterns:
|
|||
A 3
|
||||
Compile options: dupnames
|
||||
Overall options: anchored dupnames
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
a1b\=copy=A
|
||||
0: a1
|
||||
|
@ -5680,6 +5837,7 @@ Named capturing subpatterns:
|
|||
A 2
|
||||
Compile options: dupnames
|
||||
Overall options: anchored dupnames
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
ab\=copy=A
|
||||
0: ab
|
||||
|
@ -5693,6 +5851,7 @@ Named capturing subpatterns:
|
|||
A 1
|
||||
A 2
|
||||
Options: dupnames
|
||||
Starting code units: a c
|
||||
Subject length lower bound = 2
|
||||
ab\=copy=A
|
||||
0: ab
|
||||
|
@ -5711,6 +5870,7 @@ Named capturing subpatterns:
|
|||
A 3
|
||||
A 4
|
||||
Options: dupnames
|
||||
Starting code units: a c
|
||||
Subject length lower bound = 2
|
||||
cdefgh\=copy=A
|
||||
0: cdefgh
|
||||
|
@ -5727,6 +5887,7 @@ Named capturing subpatterns:
|
|||
A 3
|
||||
Compile options: dupnames
|
||||
Overall options: anchored dupnames
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
a1b\=get=A
|
||||
0: a1
|
||||
|
@ -5754,6 +5915,7 @@ Named capturing subpatterns:
|
|||
A 2
|
||||
Compile options: dupnames
|
||||
Overall options: anchored dupnames
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
ab\=get=A
|
||||
0: ab
|
||||
|
@ -5767,6 +5929,7 @@ Named capturing subpatterns:
|
|||
A 1
|
||||
A 2
|
||||
Options: dupnames
|
||||
Starting code units: a c
|
||||
Subject length lower bound = 2
|
||||
ab\=get=A
|
||||
0: ab
|
||||
|
@ -5785,6 +5948,7 @@ Named capturing subpatterns:
|
|||
A 3
|
||||
A 4
|
||||
Options: dupnames
|
||||
Starting code units: a c
|
||||
Subject length lower bound = 2
|
||||
cdefgh\=get=A
|
||||
0: cdefgh
|
||||
|
@ -5802,6 +5966,7 @@ Named capturing subpatterns:
|
|||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Duplicate name status changes
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
a1b\=copy=A
|
||||
0: a1
|
||||
|
@ -5832,6 +5997,7 @@ Named capturing subpatterns:
|
|||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Duplicate name status changes
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 6
|
||||
a bc d\=copy=A,copy=B,copy=C
|
||||
0: a bc d
|
||||
|
@ -6233,6 +6399,7 @@ Subject length lower bound = 4
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: a b
|
||||
Last code unit = 'b'
|
||||
Subject length lower bound = 2
|
||||
|
||||
|
@ -6249,6 +6416,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: a b
|
||||
Last code unit = 'b'
|
||||
Subject length lower bound = 2
|
||||
|
||||
|
@ -6265,6 +6433,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Starting code units: a b
|
||||
Last code unit = 'b'
|
||||
Subject length lower bound = 2
|
||||
|
||||
|
@ -6281,6 +6450,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Last code unit = 'A'
|
||||
Subject length lower bound = 3
|
||||
aaaA5
|
||||
|
@ -6302,6 +6472,7 @@ No match
|
|||
Capturing subpattern count = 0
|
||||
Compile options: caseless
|
||||
Overall options: anchored caseless
|
||||
Starting code units: A a
|
||||
Last code unit = 'A' (caseless)
|
||||
Subject length lower bound = 2
|
||||
aaaA5
|
||||
|
@ -9540,6 +9711,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'F'
|
||||
Last code unit = ':'
|
||||
Subject length lower bound = 22
|
||||
|
||||
|
@ -9691,6 +9863,7 @@ Named capturing subpatterns:
|
|||
D 1
|
||||
Compile options: dupnames extended
|
||||
Overall options: anchored dupnames extended
|
||||
Starting code units: a e
|
||||
Subject length lower bound = 2
|
||||
abcdX
|
||||
0: abcdX
|
||||
|
@ -10445,12 +10618,14 @@ Failed: error 125 at offset 0: lookbehind assertion is not fixed length
|
|||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
|
||||
/(^ab)++/I
|
||||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
|
||||
/(^ab|^)+/I
|
||||
|
@ -10471,12 +10646,14 @@ Subject length lower bound = 0
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
|
||||
/(?:^ab)++/I
|
||||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
|
||||
/(?:^ab|^)+/I
|
||||
|
@ -11586,6 +11763,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 0
|
||||
Compile options: dotall
|
||||
Overall options: anchored dotall
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
|
||||
/.*?a(*SKIP)b/I
|
||||
|
@ -11608,6 +11786,7 @@ Subject length lower bound = 2
|
|||
Capturing subpattern count = 0
|
||||
Compile options: dotall
|
||||
Overall options: anchored dotall
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 2
|
||||
|
||||
/(?>.*?)(?<=(abcd)|(wxyz))/I
|
||||
|
@ -13375,7 +13554,6 @@ Subject length lower bound = 1
|
|||
/(|ab)*?d/I,no_start_optimize
|
||||
Capturing subpattern count = 1
|
||||
Options: no_start_optimize
|
||||
Last code unit = 'd'
|
||||
Subject length lower bound = 0
|
||||
abd
|
||||
0: abd
|
||||
|
@ -13641,12 +13819,14 @@ get substring list failed (-2): partial match
|
|||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 3
|
||||
|
||||
/^abc/info,no_dotstar_anchor
|
||||
Capturing subpattern count = 0
|
||||
Compile options: no_dotstar_anchor
|
||||
Overall options: anchored no_dotstar_anchor
|
||||
First code unit = 'a'
|
||||
Subject length lower bound = 3
|
||||
|
||||
/.*\d/info,auto_callout
|
||||
|
@ -14684,6 +14864,7 @@ Capturing subpattern count = 2
|
|||
Max back reference = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'o'
|
||||
Last code unit = '}'
|
||||
Subject length lower bound = 65535
|
||||
|
||||
|
@ -15607,6 +15788,7 @@ No match
|
|||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
First code unit = 'b'
|
||||
Subject length lower bound = 2
|
||||
|
||||
/(a){0}.*bc/sI
|
||||
|
@ -15885,6 +16067,10 @@ No match
|
|||
No match
|
||||
|
||||
/^(?!A(?C1)B)C/
|
||||
ABC\=callout_error=1,no_jit
|
||||
No match
|
||||
|
||||
/^(?!A(?C1)B)C/no_start_optimize
|
||||
ABC\=callout_error=1
|
||||
--->ABC
|
||||
1 ^^ B
|
||||
|
|
|
@ -194,6 +194,7 @@ Subject length lower bound = 3
|
|||
Capturing subpattern count = 0
|
||||
Compile options: utf
|
||||
Overall options: anchored utf
|
||||
Starting code units: a b
|
||||
Subject length lower bound = 1
|
||||
bar
|
||||
0: b
|
||||
|
@ -205,28 +206,6 @@ No match
|
|||
\x{100}
|
||||
No match
|
||||
|
||||
/^[^ab]/IB,utf
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
^
|
||||
[\x00-`c-\xff] (neg)
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Compile options: utf
|
||||
Overall options: anchored utf
|
||||
Subject length lower bound = 1
|
||||
c
|
||||
0: c
|
||||
\x{ff}
|
||||
0: \x{ff}
|
||||
\x{100}
|
||||
0: \x{100}
|
||||
\= Expect no match
|
||||
aaa
|
||||
No match
|
||||
|
||||
/\x{100}*(\d+|"(?1)")/utf
|
||||
1234
|
||||
0: 1234
|
||||
|
@ -479,7 +458,10 @@ Subject length lower bound = 0
|
|||
\x{100}X
|
||||
0: X
|
||||
|
||||
/^\ሴ/IB,utf
|
||||
# Use no_start_optimize because the first code unit is different in 8-bit from
|
||||
# the wider modes.
|
||||
|
||||
/^\ሴ/IB,utf,no_start_optimize
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
^
|
||||
|
@ -488,9 +470,9 @@ Subject length lower bound = 0
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Compile options: utf
|
||||
Overall options: anchored utf
|
||||
Subject length lower bound = 1
|
||||
Compile options: no_start_optimize utf
|
||||
Overall options: anchored no_start_optimize utf
|
||||
Subject length lower bound = 0
|
||||
|
||||
/()()()()()()()()()()
|
||||
()()()()()()()()()()
|
||||
|
|
Loading…
Reference in New Issue