Detect unmatched closing parentheses in the pre-scan to avoid giving incorrect

error messages.
This commit is contained in:
Philip.Hazel 2016-02-02 17:22:55 +00:00
parent 4e67c0c9e9
commit 9f75a0f92a
4 changed files with 31 additions and 22 deletions

View File

@ -37,6 +37,11 @@ use.
8. Minor tidies to the pcre2demo.c sample program, including more comments 8. Minor tidies to the pcre2demo.c sample program, including more comments
about its 8-bit-ness. about its 8-bit-ness.
9. Detect unmatched closing parentheses and give the error in the pre-scan
instead of later. Previously the pre-scan carried on and could give a
misleading incorrect error message. For example, /(?J)(?'a'))(?'a')/ gave a
message about invalid duplicate group names.
Version 10.21 12-January-2016 Version 10.21 12-January-2016
----------------------------- -----------------------------

View File

@ -3377,27 +3377,24 @@ for (; ptr < cb->end_pattern; ptr++)
if ((options & PCRE2_NO_AUTO_CAPTURE) == 0) cb->bracount++; if ((options & PCRE2_NO_AUTO_CAPTURE) == 0) cb->bracount++;
} }
/* (*something) - just skip to closing ket unless PCRE2_ALT_VERBNAMES is /* (*something) - skip over a name, and then just skip to closing ket
set, in which case we have to process escapes in the string after the unless PCRE2_ALT_VERBNAMES is set, in which case we have to process
name. */ escapes in the string after a verb name terminated by a colon. */
else else
{ {
ptr += 2; ptr += 2;
while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) ptr++; while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) ptr++;
if (*ptr == CHAR_COLON) if (*ptr == CHAR_COLON && (options & PCRE2_ALT_VERBNAMES) != 0)
{ {
ptr++; ptr++;
if ((options & PCRE2_ALT_VERBNAMES) != 0) if (process_verb_name(&ptr, NULL, &errorcode, options, utf, cb) < 0)
{ goto FAILED;
if (process_verb_name(&ptr, NULL, &errorcode, options, utf, cb) < 0) }
goto FAILED; else
} {
else while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS)
{ ptr++;
while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS)
ptr++;
}
} }
nest_depth--; nest_depth--;
} }
@ -3748,7 +3745,12 @@ for (; ptr < cb->end_pattern; ptr++)
if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL; if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL;
else top_nest--; else top_nest--;
} }
if (nest_depth > 0) nest_depth--; /* Can be 0 for unmatched ) */ if (nest_depth == 0) /* Unmatched closing parenthesis */
{
errorcode = ERR22;
goto FAILED;
}
nest_depth--;
break; break;
} }
} }
@ -8704,14 +8706,11 @@ if (cb.had_accept)
reqcuflags = REQ_NONE; reqcuflags = REQ_NONE;
} }
/* If we have not reached end of pattern after a successful compile, there's an /* Fill in the final opcode and check for disastrous overflow. If no overflow,
excess bracket. Fill in the final opcode and check for disastrous overflow. but the estimated length exceeds the really used length, adjust the value of
If no overflow, but the estimated length exceeds the really used length, adjust re->blocksize, and if valgrind support is configured, mark the extra allocated
the value of re->blocksize, and if valgrind support is configured, mark the memory as unaddressable, so that any out-of-bound reads can be detected. */
extra allocated memory as unaddressable, so that any out-of-bound reads can be
detected. */
if (errorcode == 0 && ptr < cb.end_pattern) errorcode = ERR22;
*code++ = OP_END; *code++ = OP_END;
usedlength = code - codestart; usedlength = code - codestart;
if (usedlength > length) errorcode = ERR23; else if (usedlength > length) errorcode = ERR23; else

2
testdata/testinput2 vendored
View File

@ -4804,4 +4804,6 @@ a)"xI
/effg/hex /effg/hex
/(?J)(?'a'))(?'a')/
# End of testinput2 # End of testinput2

View File

@ -15165,4 +15165,7 @@ MK: A\x00b
/effg/hex /effg/hex
** Unexpected non-hex-digit 'g' in hex pattern: quote missing? ** Unexpected non-hex-digit 'g' in hex pattern: quote missing?
/(?J)(?'a'))(?'a')/
Failed: error 122 at offset 10: unmatched closing parenthesis
# End of testinput2 # End of testinput2