diff --git a/ChangeLog b/ChangeLog index 96d2e4c..de48b0f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -260,8 +260,8 @@ as /(?<=(a)(?-1))x/ which have a recursion within a backreference. 75. Give an error in pcre2_substitute() if a match ends before it starts (as a result of the use of \K). -76. Check the length of the name in (*MARK:xx) etc. dynamically to avoid the -possibility of integer overflow. +76. Check the length of subpattern names and the names in (*MARK:xx) etc. +dynamically to avoid the possibility of integer overflow. Version 10.20 30-June-2015 diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index c161f5f..eaa33fc 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -3001,6 +3001,31 @@ return arglen; +/************************************************* +* Macro for the next two functions * +*************************************************/ + +/* Both scan_for_captures() and compile_branch() use this macro to generate a +fragment of code that reads the characters of a name and sets its length +(checking for not being too long). Count the characters dynamically, to avoid +the possibility of integer overflow. The same macro is used for reading *VERB +names. */ + +#define READ_NAME(ctype, errno, errset) \ + namelen = 0; \ + while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype) != 0) \ + { \ + ptr++; \ + namelen++; \ + if (namelen > MAX_NAME_SIZE) \ + { \ + errset = errno; \ + goto FAILED; \ + } \ + } + + + /************************************************* * Scan regex to identify named groups * *************************************************/ @@ -3459,8 +3484,8 @@ for (; ptr < cb->end_pattern; ptr++) goto FAILED; } - while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) ptr++; - namelen = (int)(ptr - name); + /* Advance ptr, set namelen and check its length. */ + READ_NAME(ctype_word, ERR48, errorcode); if (*ptr != c) { @@ -3475,14 +3500,7 @@ for (; ptr < cb->end_pattern; ptr++) } if (namelen + IMM2_SIZE + 1 > cb->name_entry_size) - { cb->name_entry_size = namelen + IMM2_SIZE + 1; - if (namelen > MAX_NAME_SIZE) - { - errorcode = ERR48; - goto FAILED; - } - } /* We have a valid name for this capturing group. */ @@ -5602,8 +5620,10 @@ for (;; ptr++) PCRE2_SPTR arg = NULL; previous = NULL; ptr++; - while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_letter) != 0) ptr++; - namelen = (int)(ptr - name); + + /* Increment ptr, set namelen, check length */ + + READ_NAME(ctype_letter, ERR60, *errorcodeptr); /* It appears that Perl allows any characters whatsoever, other than a closing parenthesis, to appear in arguments, so we no longer insist on @@ -6008,12 +6028,9 @@ for (;; ptr++) *errorcodeptr = ERR28; /* Assertion expected */ goto FAILED; } - name = ptr++; - while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) - { - ptr++; - } - namelen = (int)(ptr - name); + name = ptr; + /* Increment ptr, set namelen, check length */ + READ_NAME(ctype_word, ERR48, *errorcodeptr); if (lengthptr != NULL) skipunits += IMM2_SIZE; } @@ -6396,8 +6413,8 @@ for (;; ptr++) *errorcodeptr = ERR44; /* Group name must start with non-digit */ goto FAILED; } - while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) ptr++; - namelen = (int)(ptr - name); + /* Increment ptr, set namelen, check length */ + READ_NAME(ctype_word, ERR48, *errorcodeptr); /* In the pre-compile phase, do a syntax check. */ @@ -6413,11 +6430,6 @@ for (;; ptr++) *errorcodeptr = ERR42; goto FAILED; } - if (namelen > MAX_NAME_SIZE) - { - *errorcodeptr = ERR48; - goto FAILED; - } } /* Scan the list of names generated in the pre-pass in order to get diff --git a/testdata/testinput2 b/testdata/testinput2 index 5142943..ff2af24 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4598,5 +4598,9 @@ B)x/alt_verbnames,mark /(?=a\K)/replace=z BaCaD + +/(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/ + +/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/ # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 911ebc8..b07549a 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14693,5 +14693,10 @@ Subject length lower bound = 1 /(?=a\K)/replace=z BaCaD Failed: error -60: match with end before start is not supported + +/(?'abcdefghijklmnopqrstuvwxyzABCDEFG'toolong)/ +Failed: error 148 at offset 36: subpattern name is too long (maximum 32 characters) + +/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/ # End of testinput2