Complete escape processing for PCRE2_ALT_VERBNAMES
This commit is contained in:
parent
d2e87a75af
commit
cdf07ab585
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "30 August 2015" "PCRE2 10.21"
|
.TH PCRE2API 3 "01 September 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -1060,7 +1060,10 @@ By default, for compatibility with Perl, the name in any verb sequence such as
|
||||||
parenthesis. The name is not processed in any way, and it is not possible to
|
parenthesis. The name is not processed in any way, and it is not possible to
|
||||||
include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES
|
include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES
|
||||||
option is set, normal backslash processing is applied to verb names and only an
|
option is set, normal backslash processing is applied to verb names and only an
|
||||||
unescaped closing parenthesis terminates the name.
|
unescaped closing parenthesis terminates the name. A closing parenthesis can be
|
||||||
|
included in a name either as \e) or between \eQ and \eE. If the PCRE2_EXTENDED
|
||||||
|
option is set, unescaped whitespace in verb names is skipped and #-comments are
|
||||||
|
recognized, exactly as in the rest of the pattern.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_AUTO_CALLOUT
|
PCRE2_AUTO_CALLOUT
|
||||||
.sp
|
.sp
|
||||||
|
@ -2962,6 +2965,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 30 August 2015
|
Last updated: 01 September 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2PATTERN 3 "30 August 2015" "PCRE2 10.21"
|
.TH PCRE2PATTERN 3 "01 September 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
|
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
|
||||||
|
@ -2953,7 +2953,10 @@ that does not include a closing parenthesis. The name is not processed in
|
||||||
any way, and it is not possible to include a closing parenthesis in the name.
|
any way, and it is not possible to include a closing parenthesis in the name.
|
||||||
However, if the PCRE2_ALT_VERBNAMES option is set, normal backslash processing
|
However, if the PCRE2_ALT_VERBNAMES option is set, normal backslash processing
|
||||||
is applied to verb names and only an unescaped closing parenthesis terminates
|
is applied to verb names and only an unescaped closing parenthesis terminates
|
||||||
the name.
|
the name. A closing parenthesis can be included in a name either as \e) or
|
||||||
|
between \eQ and \eE. If the PCRE2_EXTENDED option is set, unescaped whitespace
|
||||||
|
in verb names is skipped and #-comments are recognized, exactly as in the rest
|
||||||
|
of the pattern.
|
||||||
.P
|
.P
|
||||||
The maximum length of a name is 255 in the 8-bit library and 65535 in the
|
The maximum length of a name is 255 in the 8-bit library and 65535 in the
|
||||||
16-bit and 32-bit libraries. If the name is empty, that is, if the closing
|
16-bit and 32-bit libraries. If the name is empty, that is, if the closing
|
||||||
|
@ -3383,6 +3386,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 30 August 2015
|
Last updated: 01 September 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -2792,6 +2792,148 @@ return n8;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Process (*VERB) name for escapes *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* This function is called when the PCRE2_ALT_VERBNAMES option is set, to
|
||||||
|
process the characters in a verb's name argument. It is called twice, once with
|
||||||
|
codeptr == NULL, to find out the length of the processed name, and again to put
|
||||||
|
the name into memory.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
ptrptr pointer to the input pointer
|
||||||
|
codeptr pointer to the compiled code pointer
|
||||||
|
errorcodeptr pointer to the error code
|
||||||
|
utf TRUE if processing UTF
|
||||||
|
cb compile data block
|
||||||
|
|
||||||
|
Returns: length of the processed name, or < 0 on error
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
process_verb_name(PCRE2_SPTR *ptrptr, PCRE2_UCHAR **codeptr, int *errorcodeptr,
|
||||||
|
uint32_t options, BOOL utf, compile_block *cb)
|
||||||
|
{
|
||||||
|
int arglen = 0;
|
||||||
|
BOOL inescq = FALSE;
|
||||||
|
PCRE2_SPTR ptr = *ptrptr;
|
||||||
|
PCRE2_UCHAR *code = (codeptr == NULL)? NULL : *codeptr;
|
||||||
|
|
||||||
|
for (; ptr < cb->end_pattern; ptr++)
|
||||||
|
{
|
||||||
|
uint32_t x = *ptr;
|
||||||
|
|
||||||
|
/* Skip over literals */
|
||||||
|
|
||||||
|
if (inescq)
|
||||||
|
{
|
||||||
|
if (x == CHAR_BACKSLASH && ptr[1] == CHAR_E)
|
||||||
|
{
|
||||||
|
inescq = FALSE;
|
||||||
|
ptr++;;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
else /* Not a literal character */
|
||||||
|
{
|
||||||
|
if (x == CHAR_RIGHT_PARENTHESIS) break;
|
||||||
|
|
||||||
|
/* Skip over comments and whitespace in extended mode. Need a loop to handle
|
||||||
|
whitespace after a comment. */
|
||||||
|
|
||||||
|
if ((options & PCRE2_EXTENDED) != 0)
|
||||||
|
{
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
while (MAX_255(x) && (cb->ctypes[x] & ctype_space) != 0) x = *(++ptr);
|
||||||
|
if (x != CHAR_NUMBER_SIGN) break;
|
||||||
|
ptr++;
|
||||||
|
while (*ptr != CHAR_NULL)
|
||||||
|
{
|
||||||
|
if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */
|
||||||
|
{ /* IS_NEWLINE sets cb->nllen. */
|
||||||
|
ptr += cb->nllen;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ptr++;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf) FORWARDCHAR(ptr);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
x = *ptr; /* Either NULL or the char after a newline */
|
||||||
|
}
|
||||||
|
if (ptr >= cb->end_pattern) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Process escapes */
|
||||||
|
|
||||||
|
if (x == '\\')
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
*errorcodeptr = 0;
|
||||||
|
rc = check_escape(&ptr, &x, errorcodeptr, options, FALSE, cb);
|
||||||
|
*ptrptr = ptr; /* For possible error */
|
||||||
|
if (*errorcodeptr != 0) return -1;
|
||||||
|
if (rc != 0)
|
||||||
|
{
|
||||||
|
if (rc == ESC_Q)
|
||||||
|
{
|
||||||
|
inescq = TRUE;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (rc == ESC_E) continue;
|
||||||
|
*errorcodeptr = ERR40;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We have the next character in the name. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
if (code == NULL) /* Just want the length */
|
||||||
|
{
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
||||||
|
if ((int)x <= PRIV(utf8_table1)[i]) break;
|
||||||
|
arglen += i;
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
if (x > 0xffff) arglen++;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PCRE2_UCHAR cbuff[8];
|
||||||
|
x = PRIV(ord2utf)(x, cbuff);
|
||||||
|
memcpy(code, cbuff, CU2BYTES(x));
|
||||||
|
code += x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
|
/* Not UTF */
|
||||||
|
{
|
||||||
|
if (code != NULL) *code++ = x;
|
||||||
|
}
|
||||||
|
|
||||||
|
arglen++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update the pointers before returning. */
|
||||||
|
|
||||||
|
*ptrptr = ptr;
|
||||||
|
if (codeptr != NULL) *codeptr = code;
|
||||||
|
return arglen;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Scan regex to identify named groups *
|
* Scan regex to identify named groups *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
@ -5399,33 +5541,9 @@ for (;; ptr++)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
arglen = 0;
|
arglen = process_verb_name(&ptr, NULL, errorcodeptr, options,
|
||||||
while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS)
|
utf, cb);
|
||||||
{
|
if (arglen < 0) goto FAILED;
|
||||||
if (*ptr == '\\')
|
|
||||||
{
|
|
||||||
uint32_t x;
|
|
||||||
*errorcodeptr = 0;
|
|
||||||
i = check_escape(&ptr, &x, errorcodeptr, options, FALSE, cb);
|
|
||||||
if (*errorcodeptr != 0) goto FAILED;
|
|
||||||
if (i != 0)
|
|
||||||
{
|
|
||||||
*errorcodeptr = ERR40;
|
|
||||||
goto FAILED;
|
|
||||||
}
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
|
||||||
for (i = 0; i < PRIV(utf8_table1_size); i++)
|
|
||||||
if ((int)x <= PRIV(utf8_table1)[i]) break;
|
|
||||||
arglen += i;
|
|
||||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
|
||||||
if (x > 0xffff) arglen++;
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
arglen++;
|
|
||||||
ptr++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((unsigned int)arglen > MAX_MARK)
|
if ((unsigned int)arglen > MAX_MARK)
|
||||||
|
@ -5495,35 +5613,12 @@ for (;; ptr++)
|
||||||
}
|
}
|
||||||
setverb = *code++ = verbs[i].op_arg;
|
setverb = *code++ = verbs[i].op_arg;
|
||||||
*code++ = arglen;
|
*code++ = arglen;
|
||||||
|
|
||||||
/* If we are processing the argument for escapes, we don't need
|
|
||||||
to apply checks here because it was all checked above when
|
|
||||||
computing the length. */
|
|
||||||
|
|
||||||
if ((options & PCRE2_ALT_VERBNAMES) != 0)
|
if ((options & PCRE2_ALT_VERBNAMES) != 0)
|
||||||
{
|
{
|
||||||
for (; arg != ptr; arg++)
|
PCRE2_UCHAR *memcode = code; /* code is "register" */
|
||||||
{
|
(void)process_verb_name(&arg, &memcode, errorcodeptr, options,
|
||||||
if (*arg == '\\')
|
utf, cb);
|
||||||
{
|
code = memcode;
|
||||||
uint32_t x;
|
|
||||||
*errorcodeptr = 0;
|
|
||||||
(void)check_escape(&arg, &x, errorcodeptr, options, FALSE,
|
|
||||||
cb);
|
|
||||||
#ifdef SUPPORT_UNICODE
|
|
||||||
if (utf)
|
|
||||||
{
|
|
||||||
PCRE2_UCHAR cbuff[8];
|
|
||||||
x = PRIV(ord2utf)(x, cbuff);
|
|
||||||
memcpy(code, cbuff, CU2BYTES(x));
|
|
||||||
code += x;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
*code++ = x;
|
|
||||||
}
|
|
||||||
else *code++ = *arg;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else /* No argument processing */
|
else /* No argument processing */
|
||||||
{
|
{
|
||||||
|
|
|
@ -4449,4 +4449,20 @@ a random value. /Ix
|
||||||
/(*:ab\t(d\)c)xxx/alt_verbnames,mark
|
/(*:ab\t(d\)c)xxx/alt_verbnames,mark
|
||||||
cxxxz
|
cxxxz
|
||||||
|
|
||||||
|
/(*:A\Qxx)x\EB)x/alt_verbnames,mark
|
||||||
|
x
|
||||||
|
|
||||||
|
/(*:A\ExxxB)x/alt_verbnames,mark
|
||||||
|
x
|
||||||
|
|
||||||
|
/(*: A \ and #comment
|
||||||
|
\ B)x/x,alt_verbnames,mark
|
||||||
|
x
|
||||||
|
|
||||||
|
/(*:A
|
||||||
|
B)x/alt_verbnames,mark
|
||||||
|
x
|
||||||
|
|
||||||
|
/(*:abc\Qpqr)/alt_verbnames
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -14724,4 +14724,29 @@ Failed: error 122 at offset 12: unmatched closing parenthesis
|
||||||
0: xxx
|
0: xxx
|
||||||
MK: ab\x09(d)c
|
MK: ab\x09(d)c
|
||||||
|
|
||||||
|
/(*:A\Qxx)x\EB)x/alt_verbnames,mark
|
||||||
|
x
|
||||||
|
0: x
|
||||||
|
MK: Axx)xB
|
||||||
|
|
||||||
|
/(*:A\ExxxB)x/alt_verbnames,mark
|
||||||
|
x
|
||||||
|
0: x
|
||||||
|
MK: AxxxB
|
||||||
|
|
||||||
|
/(*: A \ and #comment
|
||||||
|
\ B)x/x,alt_verbnames,mark
|
||||||
|
x
|
||||||
|
0: x
|
||||||
|
MK: A and B
|
||||||
|
|
||||||
|
/(*:A
|
||||||
|
B)x/alt_verbnames,mark
|
||||||
|
x
|
||||||
|
0: x
|
||||||
|
MK: A\x0aB
|
||||||
|
|
||||||
|
/(*:abc\Qpqr)/alt_verbnames
|
||||||
|
Failed: error 160 at offset 12: (*VERB) not recognized or malformed
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
Loading…
Reference in New Issue