Bump simplecpp (#3255)

This commit is contained in:
keinflue 2021-05-10 20:46:31 +00:00 committed by GitHub
parent 75311fba0f
commit c77caf637c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 50 additions and 5 deletions

View File

@ -2334,10 +2334,11 @@ static unsigned long long stringToULLbounded(
/* Converts character literal (including prefix, but not ud-suffix) /* Converts character literal (including prefix, but not ud-suffix)
* to long long value. * to long long value.
* *
* Assumes ASCII-compatible single-byte encoded str. * Assumes ASCII-compatible single-byte encoded str for narrow literals
* and UTF-8 otherwise.
* *
* For target assumes * For target assumes
* - UTF-8 execution character set encoding or encoding matching str * - execution character set encoding matching str
* - UTF-32 execution wide-character set encoding * - UTF-32 execution wide-character set encoding
* - requirements for __STDC_UTF_16__, __STDC_UTF_32__ and __STDC_ISO_10646__ satisfied * - requirements for __STDC_UTF_16__, __STDC_UTF_32__ and __STDC_ISO_10646__ satisfied
* - char16_t is 16bit wide * - char16_t is 16bit wide
@ -2402,6 +2403,12 @@ long long simplecpp::characterLiteralToLL(const std::string& str)
throw std::runtime_error("unexpected end of character literal"); throw std::runtime_error("unexpected end of character literal");
switch (escape) { switch (escape) {
// obscure GCC extensions
case '%':
case '(':
case '[':
case '{':
// standard escape sequences
case '\'': case '\'':
case '"': case '"':
case '?': case '?':
@ -2431,8 +2438,9 @@ long long simplecpp::characterLiteralToLL(const std::string& str)
value = static_cast<unsigned char>('\v'); value = static_cast<unsigned char>('\v');
break; break;
// ESC extension // GCC extension for ESC character
case 'e': case 'e':
case 'E':
value = static_cast<unsigned char>('\x1b'); value = static_cast<unsigned char>('\x1b');
break; break;
@ -2476,8 +2484,45 @@ long long simplecpp::characterLiteralToLL(const std::string& str)
} else { } else {
value = static_cast<unsigned char>(str[pos++]); value = static_cast<unsigned char>(str[pos++]);
if (!narrow && value > 0x7f) if (!narrow && value >= 0x80) {
throw std::runtime_error("non-ASCII source characters supported only in narrow character literals"); // Assuming this is a UTF-8 encoded code point.
// This decoder may not completely validate the input.
// Noncharacters are neither rejected nor replaced.
int additional_bytes;
if (value >= 0xf5) // higher values would result in code points above 0x10ffff
throw std::runtime_error("assumed UTF-8 encoded source, but sequence is invalid");
else if (value >= 0xf0)
additional_bytes = 3;
else if (value >= 0xe0)
additional_bytes = 2;
else if (value >= 0xc2) // 0xc0 and 0xc1 are always overlong 2-bytes encodings
additional_bytes = 1;
else
throw std::runtime_error("assumed UTF-8 encoded source, but sequence is invalid");
value &= (1 << (6 - additional_bytes)) - 1;
while (additional_bytes--) {
if(pos + 1 >= str.size())
throw std::runtime_error("assumed UTF-8 encoded source, but character literal ends unexpectedly");
unsigned char c = str[pos++];
if (((c >> 6) != 2) // ensure c has form 0xb10xxxxxx
|| (!value && additional_bytes == 1 && c < 0xa0) // overlong 3-bytes encoding
|| (!value && additional_bytes == 2 && c < 0x90)) // overlong 4-bytes encoding
throw std::runtime_error("assumed UTF-8 encoded source, but sequence is invalid");
value = (value << 6) | (c & ((1 << 7) - 1));
}
if (value >= 0xd800 && value <= 0xdfff)
throw std::runtime_error("assumed UTF-8 encoded source, but sequence is invalid");
if ((utf8 && value > 0x7f) || (utf16 && value > 0xffff) || value > 0x10ffff)
throw std::runtime_error("code point too large");
}
} }
if (((narrow || utf8) && value > std::numeric_limits<unsigned char>::max()) || (utf16 && value >> 16) || value >> 32) if (((narrow || utf8) && value > std::numeric_limits<unsigned char>::max()) || (utf16 && value >> 16) || value >> 32)