Check subpattern name lengths dynamically.

This commit is contained in:
Philip.Hazel 2015-11-03 20:04:23 +00:00
parent 4ad83f7103
commit b02bb996af
4 changed files with 47 additions and 26 deletions

View File

@ -260,8 +260,8 @@ as /(?<=(a)(?-1))x/ which have a recursion within a backreference.
75. Give an error in pcre2_substitute() if a match ends before it starts (as a 75. Give an error in pcre2_substitute() if a match ends before it starts (as a
result of the use of \K). result of the use of \K).
76. Check the length of the name in (*MARK:xx) etc. dynamically to avoid the 76. Check the length of subpattern names and the names in (*MARK:xx) etc.
possibility of integer overflow. dynamically to avoid the possibility of integer overflow.
Version 10.20 30-June-2015 Version 10.20 30-June-2015

View File

@ -3001,6 +3001,31 @@ return arglen;
/*************************************************
* Macro for the next two functions *
*************************************************/
/* Both scan_for_captures() and compile_branch() use this macro to generate a
fragment of code that reads the characters of a name and sets its length
(checking for not being too long). Count the characters dynamically, to avoid
the possibility of integer overflow. The same macro is used for reading *VERB
names. */
#define READ_NAME(ctype, errno, errset) \
namelen = 0; \
while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype) != 0) \
{ \
ptr++; \
namelen++; \
if (namelen > MAX_NAME_SIZE) \
{ \
errset = errno; \
goto FAILED; \
} \
}
/************************************************* /*************************************************
* Scan regex to identify named groups * * Scan regex to identify named groups *
*************************************************/ *************************************************/
@ -3459,8 +3484,8 @@ for (; ptr < cb->end_pattern; ptr++)
goto FAILED; goto FAILED;
} }
while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) ptr++; /* Advance ptr, set namelen and check its length. */
namelen = (int)(ptr - name); READ_NAME(ctype_word, ERR48, errorcode);
if (*ptr != c) if (*ptr != c)
{ {
@ -3475,14 +3500,7 @@ for (; ptr < cb->end_pattern; ptr++)
} }
if (namelen + IMM2_SIZE + 1 > cb->name_entry_size) if (namelen + IMM2_SIZE + 1 > cb->name_entry_size)
{
cb->name_entry_size = namelen + IMM2_SIZE + 1; cb->name_entry_size = namelen + IMM2_SIZE + 1;
if (namelen > MAX_NAME_SIZE)
{
errorcode = ERR48;
goto FAILED;
}
}
/* We have a valid name for this capturing group. */ /* We have a valid name for this capturing group. */
@ -5602,8 +5620,10 @@ for (;; ptr++)
PCRE2_SPTR arg = NULL; PCRE2_SPTR arg = NULL;
previous = NULL; previous = NULL;
ptr++; ptr++;
while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_letter) != 0) ptr++;
namelen = (int)(ptr - name); /* Increment ptr, set namelen, check length */
READ_NAME(ctype_letter, ERR60, *errorcodeptr);
/* It appears that Perl allows any characters whatsoever, other than /* It appears that Perl allows any characters whatsoever, other than
a closing parenthesis, to appear in arguments, so we no longer insist on a closing parenthesis, to appear in arguments, so we no longer insist on
@ -6008,12 +6028,9 @@ for (;; ptr++)
*errorcodeptr = ERR28; /* Assertion expected */ *errorcodeptr = ERR28; /* Assertion expected */
goto FAILED; goto FAILED;
} }
name = ptr++; name = ptr;
while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) /* Increment ptr, set namelen, check length */
{ READ_NAME(ctype_word, ERR48, *errorcodeptr);
ptr++;
}
namelen = (int)(ptr - name);
if (lengthptr != NULL) skipunits += IMM2_SIZE; if (lengthptr != NULL) skipunits += IMM2_SIZE;
} }
@ -6396,8 +6413,8 @@ for (;; ptr++)
*errorcodeptr = ERR44; /* Group name must start with non-digit */ *errorcodeptr = ERR44; /* Group name must start with non-digit */
goto FAILED; goto FAILED;
} }
while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) ptr++; /* Increment ptr, set namelen, check length */
namelen = (int)(ptr - name); READ_NAME(ctype_word, ERR48, *errorcodeptr);
/* In the pre-compile phase, do a syntax check. */ /* In the pre-compile phase, do a syntax check. */
@ -6413,11 +6430,6 @@ for (;; ptr++)
*errorcodeptr = ERR42; *errorcodeptr = ERR42;
goto FAILED; goto FAILED;
} }
if (namelen > MAX_NAME_SIZE)
{
*errorcodeptr = ERR48;
goto FAILED;
}
} }
/* Scan the list of names generated in the pre-pass in order to get /* Scan the list of names generated in the pre-pass in order to get

4
testdata/testinput2 vendored
View File

@ -4598,5 +4598,9 @@ B)x/alt_verbnames,mark
/(?=a\K)/replace=z /(?=a\K)/replace=z
BaCaD BaCaD
/(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/
/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/
# End of testinput2 # End of testinput2

View File

@ -14693,5 +14693,10 @@ Subject length lower bound = 1
/(?=a\K)/replace=z /(?=a\K)/replace=z
BaCaD BaCaD
Failed: error -60: match with end before start is not supported Failed: error -60: match with end before start is not supported
/(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/
Failed: error 148 at offset 36: subpattern name is too long (maximum 32 characters)
/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/
# End of testinput2 # End of testinput2