Fix issues with NULL characters in patterns.
This commit is contained in:
parent
7b1d9549c6
commit
d71b70cdf7
|
@ -358,6 +358,14 @@ other verb "name" ended with whitespace immediately before the closing
|
||||||
parenthesis, pcre2_compile() misbehaved. Example: /(*:abc )/, but only when
|
parenthesis, pcre2_compile() misbehaved. Example: /(*:abc )/, but only when
|
||||||
both those options were set.
|
both those options were set.
|
||||||
|
|
||||||
|
107. In a number of places pcre2_compile() was not handling NULL characters
|
||||||
|
correctly, and pcre2test with the "bincode" modifier was not always correctly
|
||||||
|
displaying fields containing NULLS:
|
||||||
|
|
||||||
|
(a) Within /x extended #-comments
|
||||||
|
(b) Within the "name" part of (*MARK) and other *verbs
|
||||||
|
(c) Within the text argument of a callout
|
||||||
|
|
||||||
|
|
||||||
Version 10.20 30-June-2015
|
Version 10.20 30-June-2015
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
|
@ -3022,7 +3022,7 @@ for (; ptr < cb->end_pattern; ptr++)
|
||||||
if (x == CHAR_NUMBER_SIGN)
|
if (x == CHAR_NUMBER_SIGN)
|
||||||
{
|
{
|
||||||
ptr++;
|
ptr++;
|
||||||
while (*ptr != CHAR_NULL)
|
while (*ptr != CHAR_NULL || ptr < cb->end_pattern)
|
||||||
{
|
{
|
||||||
if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */
|
if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */
|
||||||
{ /* IS_NEWLINE sets cb->nllen. */
|
{ /* IS_NEWLINE sets cb->nllen. */
|
||||||
|
@ -4008,7 +4008,7 @@ for (;; ptr++)
|
||||||
if (c == CHAR_NUMBER_SIGN)
|
if (c == CHAR_NUMBER_SIGN)
|
||||||
{
|
{
|
||||||
ptr++;
|
ptr++;
|
||||||
while (*ptr != CHAR_NULL)
|
while (ptr < cb->end_pattern)
|
||||||
{
|
{
|
||||||
if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */
|
if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */
|
||||||
{ /* IS_NEWLINE sets cb->nllen. */
|
{ /* IS_NEWLINE sets cb->nllen. */
|
||||||
|
@ -5044,7 +5044,7 @@ for (;; ptr++)
|
||||||
while (MAX_255(*p) && (cb->ctypes[*p] & ctype_space) != 0) p++;
|
while (MAX_255(*p) && (cb->ctypes[*p] & ctype_space) != 0) p++;
|
||||||
if (*p != CHAR_NUMBER_SIGN) break;
|
if (*p != CHAR_NUMBER_SIGN) break;
|
||||||
p++;
|
p++;
|
||||||
while (*p != CHAR_NULL)
|
while (ptr < cb->end_pattern)
|
||||||
{
|
{
|
||||||
if (IS_NEWLINE(p)) /* For non-fixed-length newline cases, */
|
if (IS_NEWLINE(p)) /* For non-fixed-length newline cases, */
|
||||||
{ /* IS_NEWLINE sets cb->nllen. */
|
{ /* IS_NEWLINE sets cb->nllen. */
|
||||||
|
@ -5832,7 +5832,7 @@ for (;; ptr++)
|
||||||
if ((options & PCRE2_ALT_VERBNAMES) == 0)
|
if ((options & PCRE2_ALT_VERBNAMES) == 0)
|
||||||
{
|
{
|
||||||
arglen = 0;
|
arglen = 0;
|
||||||
while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS)
|
while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS)
|
||||||
{
|
{
|
||||||
ptr++; /* Check length as we go */
|
ptr++; /* Check length as we go */
|
||||||
arglen++; /* along, to avoid the */
|
arglen++; /* along, to avoid the */
|
||||||
|
|
|
@ -63,6 +63,7 @@ static const char *OP_names[] = { OP_NAME_LIST };
|
||||||
#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
|
#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_)
|
||||||
#define print_char PCRE2_SUFFIX(print_char_)
|
#define print_char PCRE2_SUFFIX(print_char_)
|
||||||
#define print_custring PCRE2_SUFFIX(print_custring_)
|
#define print_custring PCRE2_SUFFIX(print_custring_)
|
||||||
|
#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_)
|
||||||
#define print_prop PCRE2_SUFFIX(print_prop_)
|
#define print_prop PCRE2_SUFFIX(print_prop_)
|
||||||
|
|
||||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||||
|
@ -188,12 +189,14 @@ return 0;
|
||||||
* Print string as a list of code units *
|
* Print string as a list of code units *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This takes no account of UTF as it always prints each individual code unit.
|
/* These take no account of UTF as they always print each individual code unit.
|
||||||
The string is zero-terminated.
|
The string is zero-terminated for print_custring(); the length is given for
|
||||||
|
print_custring_bylen().
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
f file to write to
|
f file to write to
|
||||||
ptr point to the string
|
ptr point to the string
|
||||||
|
len length for print_custring_bylen()
|
||||||
|
|
||||||
Returns: nothing
|
Returns: nothing
|
||||||
*/
|
*/
|
||||||
|
@ -208,6 +211,16 @@ while (*ptr != '\0')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len)
|
||||||
|
{
|
||||||
|
while (len-- > 0)
|
||||||
|
{
|
||||||
|
register uint32_t c = *ptr++;
|
||||||
|
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
|
@ -603,7 +616,7 @@ for(;;)
|
||||||
c = code[1 + 4*LINK_SIZE];
|
c = code[1 + 4*LINK_SIZE];
|
||||||
fprintf(f, " %s %c", OP_names[*code], c);
|
fprintf(f, " %s %c", OP_names[*code], c);
|
||||||
extra = GET(code, 1 + 2*LINK_SIZE);
|
extra = GET(code, 1 + 2*LINK_SIZE);
|
||||||
print_custring(f, code + 2 + 4*LINK_SIZE);
|
print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE);
|
||||||
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
|
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
|
||||||
if (c == PRIV(callout_start_delims)[i])
|
if (c == PRIV(callout_start_delims)[i])
|
||||||
{
|
{
|
||||||
|
@ -791,7 +804,7 @@ for(;;)
|
||||||
case OP_SKIP_ARG:
|
case OP_SKIP_ARG:
|
||||||
case OP_THEN_ARG:
|
case OP_THEN_ARG:
|
||||||
fprintf(f, " %s ", OP_names[*code]);
|
fprintf(f, " %s ", OP_names[*code]);
|
||||||
print_custring(f, code + 2);
|
print_custring_bylen(f, code + 2, code[1]);
|
||||||
extra += code[1];
|
extra += code[1];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
|
@ -4704,4 +4704,24 @@ a)"xI
|
||||||
|
|
||||||
/\x8a+f|;T?(*:;.'?`(\xeap ){![^()!y*''C*(?';]{1;(\x08)/B,alt_verbnames,dupnames,extended
|
/\x8a+f|;T?(*:;.'?`(\xeap ){![^()!y*''C*(?';]{1;(\x08)/B,alt_verbnames,dupnames,extended
|
||||||
|
|
||||||
|
# Tests for NULL characters in comments and verb "names" and callouts
|
||||||
|
|
||||||
|
# /A#B\x00C\x0aZ/
|
||||||
|
/41 23 42 00 43 0a 5a/Bx,hex
|
||||||
|
|
||||||
|
# /A+#B\x00C\x0a+/
|
||||||
|
/41 2b 23 42 00 43 0a 2b/Bx,hex
|
||||||
|
|
||||||
|
# /A(*:B\x00W#X\00Y\x0aC)Z/
|
||||||
|
/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex,alt_verbnames
|
||||||
|
|
||||||
|
# /A(*:B\x00W#X\00Y\x0aC)Z/
|
||||||
|
/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex
|
||||||
|
|
||||||
|
# /A(?C{X\x00Y})B/
|
||||||
|
/41 28 3f 43 7b 58 00 59 7d 29 42/B,hex
|
||||||
|
|
||||||
|
# /A(?#X\x00Y)B/
|
||||||
|
/41 28 3f 23 7b 00 7d 29 42/B,hex
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -14998,4 +14998,66 @@ Subject length lower bound = 0
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Tests for NULL characters in comments and verb "names" and callouts
|
||||||
|
|
||||||
|
# /A#B\x00C\x0aZ/
|
||||||
|
/41 23 42 00 43 0a 5a/Bx,hex
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
AZ
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
# /A+#B\x00C\x0a+/
|
||||||
|
/41 2b 23 42 00 43 0a 2b/Bx,hex
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
A++
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
# /A(*:B\x00W#X\00Y\x0aC)Z/
|
||||||
|
/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex,alt_verbnames
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
A
|
||||||
|
*MARK B\x{0}WC
|
||||||
|
Z
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
# /A(*:B\x00W#X\00Y\x0aC)Z/
|
||||||
|
/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
A
|
||||||
|
*MARK B\x{0}W#X\x{0}Y\x{a}C
|
||||||
|
Z
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
# /A(?C{X\x00Y})B/
|
||||||
|
/41 28 3f 43 7b 58 00 59 7d 29 42/B,hex
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
A
|
||||||
|
CalloutStr {X\x{0}Y} 5 10 1
|
||||||
|
B
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
# /A(?#X\x00Y)B/
|
||||||
|
/41 28 3f 23 7b 00 7d 29 42/B,hex
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
AB
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
Loading…
Reference in New Issue