Update pcre2demo to deal with various \K inside assertion anomalies.
This commit is contained in:
parent
94d5f4a050
commit
a9f7c80fa3
|
@ -95,6 +95,11 @@ not by JIT or pcre2_dfa_match(). Their settings are shown in pcre2test callouts
|
||||||
if the callout_extra subject modifier is set. These bits are provided to help
|
if the callout_extra subject modifier is set. These bits are provided to help
|
||||||
with tracking how a backtracking match is proceeding.
|
with tracking how a backtracking match is proceeding.
|
||||||
|
|
||||||
|
23. Updated the pcre2demo.c demonstration program, which was missing the extra
|
||||||
|
code for -g that handles the case when \K in an assertion causes the match to
|
||||||
|
end at the original start point. Also arranged for it to detect when \K causes
|
||||||
|
the end of a match to be before its start.
|
||||||
|
|
||||||
|
|
||||||
Version 10.30 14-August-2017
|
Version 10.30 14-August-2017
|
||||||
----------------------------
|
----------------------------
|
||||||
|
|
|
@ -211,6 +211,21 @@ pcre2_match_data_create_from_pattern() above. */
|
||||||
if (rc == 0)
|
if (rc == 0)
|
||||||
printf("ovector was not big enough for all the captured substrings\n");
|
printf("ovector was not big enough for all the captured substrings\n");
|
||||||
|
|
||||||
|
/* We must guard against patterns such as /(?=.\K)/ that use \K in an assertion
|
||||||
|
to set the start of a match later than its end. In this demonstration program,
|
||||||
|
we just detect this case and give up. */
|
||||||
|
|
||||||
|
if (ovector[0] > ovector[1])
|
||||||
|
{
|
||||||
|
printf("\\K was used in an assertion to set the match start after its end.\n"
|
||||||
|
"From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
|
||||||
|
(char *)(subject + ovector[1]));
|
||||||
|
printf("Run abandoned\n");
|
||||||
|
pcre2_match_data_free(match_data);
|
||||||
|
pcre2_code_free(re);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
/* Show substrings stored in the output vector by number. Obviously, in a real
|
/* Show substrings stored in the output vector by number. Obviously, in a real
|
||||||
application you might want to do things other than print them. */
|
application you might want to do things other than print them. */
|
||||||
|
|
||||||
|
@ -338,6 +353,29 @@ for (;;)
|
||||||
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If the previous match was not an empty string, there is one tricky case to
|
||||||
|
consider. If a pattern contains \K within a lookbehind assertion at the
|
||||||
|
start, the end of the matched string can be at the offset where the match
|
||||||
|
started. Without special action, this leads to a loop that keeps on matching
|
||||||
|
the same substring. We must detect this case and arrange to move the start on
|
||||||
|
by one character. The pcre2_get_startchar() function returns the starting
|
||||||
|
offset that was passed to pcre2_match(). */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
|
||||||
|
if (start_offset <= startchar)
|
||||||
|
{
|
||||||
|
if (startchar >= subject_length) break; /* Reached end of subject. */
|
||||||
|
start_offset = startchar + 1; /* Advance by one character. */
|
||||||
|
if (utf8) /* If UTF-8, it may be more */
|
||||||
|
{ /* than one code unit. */
|
||||||
|
for (; start_offset < subject_length; start_offset++)
|
||||||
|
if ((subject[start_offset] & 0xc0) != 0x80) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Run the next matching operation */
|
/* Run the next matching operation */
|
||||||
|
|
||||||
rc = pcre2_match(
|
rc = pcre2_match(
|
||||||
|
@ -402,6 +440,21 @@ for (;;)
|
||||||
if (rc == 0)
|
if (rc == 0)
|
||||||
printf("ovector was not big enough for all the captured substrings\n");
|
printf("ovector was not big enough for all the captured substrings\n");
|
||||||
|
|
||||||
|
/* We must guard against patterns such as /(?=.\K)/ that use \K in an
|
||||||
|
assertion to set the start of a match later than its end. In this
|
||||||
|
demonstration program, we just detect this case and give up. */
|
||||||
|
|
||||||
|
if (ovector[0] > ovector[1])
|
||||||
|
{
|
||||||
|
printf("\\K was used in an assertion to set the match start after its end.\n"
|
||||||
|
"From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
|
||||||
|
(char *)(subject + ovector[1]));
|
||||||
|
printf("Run abandoned\n");
|
||||||
|
pcre2_match_data_free(match_data);
|
||||||
|
pcre2_code_free(re);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
/* As before, show substrings stored in the output vector by number, and then
|
/* As before, show substrings stored in the output vector by number, and then
|
||||||
also any named substrings. */
|
also any named substrings. */
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue