From d71b70cdf76bde926d648b76ffb8d47c8f5cf28f Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Wed, 2 Dec 2015 17:39:26 +0000 Subject: [PATCH] Fix issues with NULL characters in patterns. --- ChangeLog | 8 ++++++ src/pcre2_compile.c | 18 ++++++------- src/pcre2_printint.c | 33 ++++++++++++++++------- testdata/testinput2 | 20 ++++++++++++++ testdata/testoutput2 | 62 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 122 insertions(+), 19 deletions(-) diff --git a/ChangeLog b/ChangeLog index 332b3c2..3bc48ef 100644 --- a/ChangeLog +++ b/ChangeLog @@ -358,6 +358,14 @@ other verb "name" ended with whitespace immediately before the closing parenthesis, pcre2_compile() misbehaved. Example: /(*:abc )/, but only when both those options were set. +107. In a number of places pcre2_compile() was not handling NULL characters +correctly, and pcre2test with the "bincode" modifier was not always correctly +displaying fields containing NULLS: + + (a) Within /x extended #-comments + (b) Within the "name" part of (*MARK) and other *verbs + (c) Within the text argument of a callout + Version 10.20 30-June-2015 -------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 3e92436..373d3fd 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -3017,12 +3017,12 @@ for (; ptr < cb->end_pattern; ptr++) if ((options & PCRE2_EXTENDED) != 0) { - PCRE2_SPTR wscptr = ptr; + PCRE2_SPTR wscptr = ptr; while (MAX_255(x) && (cb->ctypes[x] & ctype_space) != 0) x = *(++ptr); if (x == CHAR_NUMBER_SIGN) - { + { ptr++; - while (*ptr != CHAR_NULL) + while (*ptr != CHAR_NULL || ptr < cb->end_pattern) { if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */ { /* IS_NEWLINE sets cb->nllen. */ @@ -3034,10 +3034,10 @@ for (; ptr < cb->end_pattern; ptr++) if (utf) FORWARDCHAR(ptr); #endif } - } - + } + /* If we have skipped any characters, restart the loop. */ - + if (ptr > wscptr) { ptr--; @@ -4008,7 +4008,7 @@ for (;; ptr++) if (c == CHAR_NUMBER_SIGN) { ptr++; - while (*ptr != CHAR_NULL) + while (ptr < cb->end_pattern) { if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */ { /* IS_NEWLINE sets cb->nllen. */ @@ -5044,7 +5044,7 @@ for (;; ptr++) while (MAX_255(*p) && (cb->ctypes[*p] & ctype_space) != 0) p++; if (*p != CHAR_NUMBER_SIGN) break; p++; - while (*p != CHAR_NULL) + while (ptr < cb->end_pattern) { if (IS_NEWLINE(p)) /* For non-fixed-length newline cases, */ { /* IS_NEWLINE sets cb->nllen. */ @@ -5832,7 +5832,7 @@ for (;; ptr++) if ((options & PCRE2_ALT_VERBNAMES) == 0) { arglen = 0; - while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) + while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) { ptr++; /* Check length as we go */ arglen++; /* along, to avoid the */ diff --git a/src/pcre2_printint.c b/src/pcre2_printint.c index 2cd01ab..6d4fe60 100644 --- a/src/pcre2_printint.c +++ b/src/pcre2_printint.c @@ -58,12 +58,13 @@ static const char *OP_names[] = { OP_NAME_LIST }; /* The functions and tables herein must all have mode-dependent names. */ -#define OP_lengths PCRE2_SUFFIX(OP_lengths_) -#define get_ucpname PCRE2_SUFFIX(get_ucpname_) -#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_) -#define print_char PCRE2_SUFFIX(print_char_) -#define print_custring PCRE2_SUFFIX(print_custring_) -#define print_prop PCRE2_SUFFIX(print_prop_) +#define OP_lengths PCRE2_SUFFIX(OP_lengths_) +#define get_ucpname PCRE2_SUFFIX(get_ucpname_) +#define pcre2_printint PCRE2_SUFFIX(pcre2_printint_) +#define print_char PCRE2_SUFFIX(print_char_) +#define print_custring PCRE2_SUFFIX(print_custring_) +#define print_custring_bylen PCRE2_SUFFIX(print_custring_bylen_) +#define print_prop PCRE2_SUFFIX(print_prop_) /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that the definition is next to the definition of the opcodes in pcre2_internal.h. @@ -188,12 +189,14 @@ return 0; * Print string as a list of code units * *************************************************/ -/* This takes no account of UTF as it always prints each individual code unit. -The string is zero-terminated. +/* These take no account of UTF as they always print each individual code unit. +The string is zero-terminated for print_custring(); the length is given for +print_custring_bylen(). Arguments: f file to write to ptr point to the string + len length for print_custring_bylen() Returns: nothing */ @@ -208,6 +211,16 @@ while (*ptr != '\0') } } +static void +print_custring_bylen(FILE *f, PCRE2_SPTR ptr, PCRE2_UCHAR len) +{ +while (len-- > 0) + { + register uint32_t c = *ptr++; + if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c); + } +} + /************************************************* @@ -603,7 +616,7 @@ for(;;) c = code[1 + 4*LINK_SIZE]; fprintf(f, " %s %c", OP_names[*code], c); extra = GET(code, 1 + 2*LINK_SIZE); - print_custring(f, code + 2 + 4*LINK_SIZE); + print_custring_bylen(f, code + 2 + 4*LINK_SIZE, extra - 3 - 4*LINK_SIZE); for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) if (c == PRIV(callout_start_delims)[i]) { @@ -791,7 +804,7 @@ for(;;) case OP_SKIP_ARG: case OP_THEN_ARG: fprintf(f, " %s ", OP_names[*code]); - print_custring(f, code + 2); + print_custring_bylen(f, code + 2, code[1]); extra += code[1]; break; diff --git a/testdata/testinput2 b/testdata/testinput2 index eb6b9e4..8b85d53 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4704,4 +4704,24 @@ a)"xI /\x8a+f|;T?(*:;.'?`(\xeap ){![^()!y*''C*(?';]{1;(\x08)/B,alt_verbnames,dupnames,extended +# Tests for NULL characters in comments and verb "names" and callouts + +# /A#B\x00C\x0aZ/ +/41 23 42 00 43 0a 5a/Bx,hex + +# /A+#B\x00C\x0a+/ +/41 2b 23 42 00 43 0a 2b/Bx,hex + +# /A(*:B\x00W#X\00Y\x0aC)Z/ +/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex,alt_verbnames + +# /A(*:B\x00W#X\00Y\x0aC)Z/ +/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex + +# /A(?C{X\x00Y})B/ +/41 28 3f 43 7b 58 00 59 7d 29 42/B,hex + +# /A(?#X\x00Y)B/ +/41 28 3f 23 7b 00 7d 29 42/B,hex + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 698c44d..50993c8 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14998,4 +14998,66 @@ Subject length lower bound = 0 End ------------------------------------------------------------------ +# Tests for NULL characters in comments and verb "names" and callouts + +# /A#B\x00C\x0aZ/ +/41 23 42 00 43 0a 5a/Bx,hex +------------------------------------------------------------------ + Bra + AZ + Ket + End +------------------------------------------------------------------ + +# /A+#B\x00C\x0a+/ +/41 2b 23 42 00 43 0a 2b/Bx,hex +------------------------------------------------------------------ + Bra + A++ + Ket + End +------------------------------------------------------------------ + +# /A(*:B\x00W#X\00Y\x0aC)Z/ +/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex,alt_verbnames +------------------------------------------------------------------ + Bra + A + *MARK B\x{0}WC + Z + Ket + End +------------------------------------------------------------------ + +# /A(*:B\x00W#X\00Y\x0aC)Z/ +/41 28 2a 3a 42 00 57 23 58 00 59 0a 43 29 5a/Bx,hex +------------------------------------------------------------------ + Bra + A + *MARK B\x{0}W#X\x{0}Y\x{a}C + Z + Ket + End +------------------------------------------------------------------ + +# /A(?C{X\x00Y})B/ +/41 28 3f 43 7b 58 00 59 7d 29 42/B,hex +------------------------------------------------------------------ + Bra + A + CalloutStr {X\x{0}Y} 5 10 1 + B + Ket + End +------------------------------------------------------------------ + +# /A(?#X\x00Y)B/ +/41 28 3f 23 7b 00 7d 29 42/B,hex +------------------------------------------------------------------ + Bra + AB + Ket + End +------------------------------------------------------------------ + # End of testinput2