Complete escape processing for PCRE2_ALT_VERBNAMES
This commit is contained in:
parent
d2e87a75af
commit
cdf07ab585
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "30 August 2015" "PCRE2 10.21"
|
||||
.TH PCRE2API 3 "01 September 2015" "PCRE2 10.21"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -1060,7 +1060,10 @@ By default, for compatibility with Perl, the name in any verb sequence such as
|
|||
parenthesis. The name is not processed in any way, and it is not possible to
|
||||
include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES
|
||||
option is set, normal backslash processing is applied to verb names and only an
|
||||
unescaped closing parenthesis terminates the name.
|
||||
unescaped closing parenthesis terminates the name. A closing parenthesis can be
|
||||
included in a name either as \e) or between \eQ and \eE. If the PCRE2_EXTENDED
|
||||
option is set, unescaped whitespace in verb names is skipped and #-comments are
|
||||
recognized, exactly as in the rest of the pattern.
|
||||
.sp
|
||||
PCRE2_AUTO_CALLOUT
|
||||
.sp
|
||||
|
@ -2962,6 +2965,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 30 August 2015
|
||||
Last updated: 01 September 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2PATTERN 3 "30 August 2015" "PCRE2 10.21"
|
||||
.TH PCRE2PATTERN 3 "01 September 2015" "PCRE2 10.21"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
|
||||
|
@ -2953,7 +2953,10 @@ that does not include a closing parenthesis. The name is not processed in
|
|||
any way, and it is not possible to include a closing parenthesis in the name.
|
||||
However, if the PCRE2_ALT_VERBNAMES option is set, normal backslash processing
|
||||
is applied to verb names and only an unescaped closing parenthesis terminates
|
||||
the name.
|
||||
the name. A closing parenthesis can be included in a name either as \e) or
|
||||
between \eQ and \eE. If the PCRE2_EXTENDED option is set, unescaped whitespace
|
||||
in verb names is skipped and #-comments are recognized, exactly as in the rest
|
||||
of the pattern.
|
||||
.P
|
||||
The maximum length of a name is 255 in the 8-bit library and 65535 in the
|
||||
16-bit and 32-bit libraries. If the name is empty, that is, if the closing
|
||||
|
@ -3383,6 +3386,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 30 August 2015
|
||||
Last updated: 01 September 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -2792,6 +2792,148 @@ return n8;
|
|||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Process (*VERB) name for escapes *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called when the PCRE2_ALT_VERBNAMES option is set, to
|
||||
process the characters in a verb's name argument. It is called twice, once with
|
||||
codeptr == NULL, to find out the length of the processed name, and again to put
|
||||
the name into memory.
|
||||
|
||||
Arguments:
|
||||
ptrptr pointer to the input pointer
|
||||
codeptr pointer to the compiled code pointer
|
||||
errorcodeptr pointer to the error code
|
||||
utf TRUE if processing UTF
|
||||
cb compile data block
|
||||
|
||||
Returns: length of the processed name, or < 0 on error
|
||||
*/
|
||||
|
||||
static int
|
||||
process_verb_name(PCRE2_SPTR *ptrptr, PCRE2_UCHAR **codeptr, int *errorcodeptr,
|
||||
uint32_t options, BOOL utf, compile_block *cb)
|
||||
{
|
||||
int arglen = 0;
|
||||
BOOL inescq = FALSE;
|
||||
PCRE2_SPTR ptr = *ptrptr;
|
||||
PCRE2_UCHAR *code = (codeptr == NULL)? NULL : *codeptr;
|
||||
|
||||
for (; ptr < cb->end_pattern; ptr++)
|
||||
{
|
||||
uint32_t x = *ptr;
|
||||
|
||||
/* Skip over literals */
|
||||
|
||||
if (inescq)
|
||||
{
|
||||
if (x == CHAR_BACKSLASH && ptr[1] == CHAR_E)
|
||||
{
|
||||
inescq = FALSE;
|
||||
ptr++;;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
else /* Not a literal character */
|
||||
{
|
||||
if (x == CHAR_RIGHT_PARENTHESIS) break;
|
||||
|
||||
/* Skip over comments and whitespace in extended mode. Need a loop to handle
|
||||
whitespace after a comment. */
|
||||
|
||||
if ((options & PCRE2_EXTENDED) != 0)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
while (MAX_255(x) && (cb->ctypes[x] & ctype_space) != 0) x = *(++ptr);
|
||||
if (x != CHAR_NUMBER_SIGN) break;
|
||||
ptr++;
|
||||
while (*ptr != CHAR_NULL)
|
||||
{
|
||||
if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */
|
||||
{ /* IS_NEWLINE sets cb->nllen. */
|
||||
ptr += cb->nllen;
|
||||
break;
|
||||
}
|
||||
ptr++;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf) FORWARDCHAR(ptr);
|
||||
#endif
|
||||
}
|
||||
x = *ptr; /* Either NULL or the char after a newline */
|
||||
}
|
||||
if (ptr >= cb->end_pattern) break;
|
||||
}
|
||||
|
||||
/* Process escapes */
|
||||
|
||||
if (x == '\\')
|
||||
{
|
||||
int rc;
|
||||
*errorcodeptr = 0;
|
||||
rc = check_escape(&ptr, &x, errorcodeptr, options, FALSE, cb);
|
||||
*ptrptr = ptr; /* For possible error */
|
||||
if (*errorcodeptr != 0) return -1;
|
||||
if (rc != 0)
|
||||
{
|
||||
if (rc == ESC_Q)
|
||||
{
|
||||
inescq = TRUE;
|
||||
continue;
|
||||
}
|
||||
if (rc == ESC_E) continue;
|
||||
*errorcodeptr = ERR40;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* We have the next character in the name. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
if (code == NULL) /* Just want the length */
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
int i;
|
||||
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
||||
if ((int)x <= PRIV(utf8_table1)[i]) break;
|
||||
arglen += i;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if (x > 0xffff) arglen++;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_UCHAR cbuff[8];
|
||||
x = PRIV(ord2utf)(x, cbuff);
|
||||
memcpy(code, cbuff, CU2BYTES(x));
|
||||
code += x;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Not UTF */
|
||||
{
|
||||
if (code != NULL) *code++ = x;
|
||||
}
|
||||
|
||||
arglen++;
|
||||
}
|
||||
|
||||
/* Update the pointers before returning. */
|
||||
|
||||
*ptrptr = ptr;
|
||||
if (codeptr != NULL) *codeptr = code;
|
||||
return arglen;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Scan regex to identify named groups *
|
||||
*************************************************/
|
||||
|
@ -5399,33 +5541,9 @@ for (;; ptr++)
|
|||
}
|
||||
else
|
||||
{
|
||||
arglen = 0;
|
||||
while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS)
|
||||
{
|
||||
if (*ptr == '\\')
|
||||
{
|
||||
uint32_t x;
|
||||
*errorcodeptr = 0;
|
||||
i = check_escape(&ptr, &x, errorcodeptr, options, FALSE, cb);
|
||||
if (*errorcodeptr != 0) goto FAILED;
|
||||
if (i != 0)
|
||||
{
|
||||
*errorcodeptr = ERR40;
|
||||
goto FAILED;
|
||||
}
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
||||
if ((int)x <= PRIV(utf8_table1)[i]) break;
|
||||
arglen += i;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if (x > 0xffff) arglen++;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
arglen++;
|
||||
ptr++;
|
||||
}
|
||||
arglen = process_verb_name(&ptr, NULL, errorcodeptr, options,
|
||||
utf, cb);
|
||||
if (arglen < 0) goto FAILED;
|
||||
}
|
||||
|
||||
if ((unsigned int)arglen > MAX_MARK)
|
||||
|
@ -5495,35 +5613,12 @@ for (;; ptr++)
|
|||
}
|
||||
setverb = *code++ = verbs[i].op_arg;
|
||||
*code++ = arglen;
|
||||
|
||||
/* If we are processing the argument for escapes, we don't need
|
||||
to apply checks here because it was all checked above when
|
||||
computing the length. */
|
||||
|
||||
if ((options & PCRE2_ALT_VERBNAMES) != 0)
|
||||
{
|
||||
for (; arg != ptr; arg++)
|
||||
{
|
||||
if (*arg == '\\')
|
||||
{
|
||||
uint32_t x;
|
||||
*errorcodeptr = 0;
|
||||
(void)check_escape(&arg, &x, errorcodeptr, options, FALSE,
|
||||
cb);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_UCHAR cbuff[8];
|
||||
x = PRIV(ord2utf)(x, cbuff);
|
||||
memcpy(code, cbuff, CU2BYTES(x));
|
||||
code += x;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
*code++ = x;
|
||||
}
|
||||
else *code++ = *arg;
|
||||
}
|
||||
PCRE2_UCHAR *memcode = code; /* code is "register" */
|
||||
(void)process_verb_name(&arg, &memcode, errorcodeptr, options,
|
||||
utf, cb);
|
||||
code = memcode;
|
||||
}
|
||||
else /* No argument processing */
|
||||
{
|
||||
|
|
|
@ -4449,4 +4449,20 @@ a random value. /Ix
|
|||
/(*:ab\t(d\)c)xxx/alt_verbnames,mark
|
||||
cxxxz
|
||||
|
||||
/(*:A\Qxx)x\EB)x/alt_verbnames,mark
|
||||
x
|
||||
|
||||
/(*:A\ExxxB)x/alt_verbnames,mark
|
||||
x
|
||||
|
||||
/(*: A \ and #comment
|
||||
\ B)x/x,alt_verbnames,mark
|
||||
x
|
||||
|
||||
/(*:A
|
||||
B)x/alt_verbnames,mark
|
||||
x
|
||||
|
||||
/(*:abc\Qpqr)/alt_verbnames
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -14724,4 +14724,29 @@ Failed: error 122 at offset 12: unmatched closing parenthesis
|
|||
0: xxx
|
||||
MK: ab\x09(d)c
|
||||
|
||||
/(*:A\Qxx)x\EB)x/alt_verbnames,mark
|
||||
x
|
||||
0: x
|
||||
MK: Axx)xB
|
||||
|
||||
/(*:A\ExxxB)x/alt_verbnames,mark
|
||||
x
|
||||
0: x
|
||||
MK: AxxxB
|
||||
|
||||
/(*: A \ and #comment
|
||||
\ B)x/x,alt_verbnames,mark
|
||||
x
|
||||
0: x
|
||||
MK: A and B
|
||||
|
||||
/(*:A
|
||||
B)x/alt_verbnames,mark
|
||||
x
|
||||
0: x
|
||||
MK: A\x0aB
|
||||
|
||||
/(*:abc\Qpqr)/alt_verbnames
|
||||
Failed: error 160 at offset 12: (*VERB) not recognized or malformed
|
||||
|
||||
# End of testinput2
|
||||
|
|
Loading…
Reference in New Issue