bump simplecpp

2021-04-30 17:41:59 +02:00 · 2021-04-30 17:41:59 +02:00 · f4e69b99d2
parent d2d2124238
commit f4e69b99d2
2 changed files with 209 additions and 3 deletions
--- a/externals/simplecpp/simplecpp.cpp
+++ b/externals/simplecpp/simplecpp.cpp
@ -23,6 +23,7 @@
 #include "simplecpp.h"
 #include <algorithm>
 #include <climits>
 #include <cstdlib>
 #include <cstring>
 #include <exception>
@ -2304,6 +2305,208 @@ static void simplifyName(simplecpp::TokenList &expr)
    }
 }
 /*
 * Reads at least minlen and at most maxlen digits (inc. prefix) in base base
 * from s starting at position pos and converts them to a
 * unsigned long long value, updating pos to point to the first
 * unused element of s.
 * Returns ULLONG_MAX if the result is not representable and
 * throws if the above requirements were not possible to satisfy.
 */
 static unsigned long long stringToULLbounded(
    const std::string& s,
    std::size_t& pos,
    int base = 0,
    std::ptrdiff_t minlen = 1,
    std::size_t maxlen = std::string::npos
 )
 {
    std::string sub = s.substr(pos, maxlen);
    const char* start = sub.c_str();
    char* end;
    unsigned long long value = std::strtoull(start, &end, base);
    pos += end - start;
    if (end - start < minlen)
        throw std::runtime_error("expected digit");
    return value;
 }
 /* Converts character literal (including prefix, but not ud-suffix)
 * to long long value.
 *
 * Assumes ASCII-compatible single-byte encoded str.
 *
 * For target assumes
 * - UTF-8 execution character set encoding or encoding matching str
 * - UTF-32 execution wide-character set encoding
 * - requirements for __STDC_UTF_16__, __STDC_UTF_32__ and __STDC_ISO_10646__ satisfied
 * - char16_t is 16bit wide
 * - char32_t is 32bit wide
 * - wchar_t is 32bit wide and unsigned
 * - matching char signedness to host
 * - matching sizeof(int) to host
 *
 * For host assumes
 * - ASCII-compatible execution character set
 *
 * For host and target assumes
 * - CHAR_BIT == 8
 * - two's complement
 *
 * Implements multi-character narrow literals according to GCC's behavior,
 * except multi code unit universal character names are not supported.
 * Multi-character wide literals are not supported.
 * Limited support of universal character names for non-UTF-8 execution character set encodings.
 */
 long long simplecpp::characterLiteralToLL(const std::string& str)
 {
    // default is wide/utf32
    bool narrow = false;
    bool utf8 = false;
    bool utf16 = false;
    std::size_t pos;
    if (str.size() >= 1 && str[0] == '\'') {
        narrow = true;
        pos = 1;
    } else if (str.size() >= 2 && str[0] == 'u' && str[1] == '\'') {
        utf16 = true;
        pos = 2;
    } else if (str.size() >= 3 && str[0] == 'u' && str[1] == '8' && str[2] == '\'') {
        utf8 = true;
        pos = 3;
    } else if (str.size() >= 2 && (str[0] == 'L' || str[0] == 'U') && str[1] == '\'') {
        pos = 2;
    } else
        throw std::runtime_error("expected a character literal");
    unsigned long long multivalue = 0;
    std::size_t nbytes = 0;
    while (pos + 1 < str.size()) {
        if (str[pos] == '\'' || str[pos] == '\n')
            throw std::runtime_error("raw single quotes and newlines not allowed in character literals");
        if (nbytes >= 1 && !narrow)
            throw std::runtime_error("multiple characters only supported in narrow character literals");
        unsigned long long value;
        if (str[pos] == '\\') {
            pos++;
            char escape = str[pos++];
            if (pos >= str.size())
                throw std::runtime_error("unexpected end of character literal");
            switch (escape) {
            case '\'':
            case '"':
            case '?':
            case '\\':
                value = static_cast<unsigned char>(escape);
                break;
            case 'a':
                value = static_cast<unsigned char>('\a');
                break;
            case 'b':
                value = static_cast<unsigned char>('\b');
                break;
            case 'f':
                value = static_cast<unsigned char>('\f');
                break;
            case 'n':
                value = static_cast<unsigned char>('\n');
                break;
            case 'r':
                value = static_cast<unsigned char>('\r');
                break;
            case 't':
                value = static_cast<unsigned char>('\t');
                break;
            case 'v':
                value = static_cast<unsigned char>('\v');
                break;
            // ESC extension
            case 'e':
                value = static_cast<unsigned char>('\x1b');
                break;
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
                // octal escape sequences consist of 1 to 3 digits
                value = stringToULLbounded(str, --pos, 8, 1, 3);
                break;
            case 'x':
                // hexadecimal escape sequences consist of at least 1 digit
                value = stringToULLbounded(str, pos, 16);
                break;
            case 'u':
            case 'U': {
                // universal character names have exactly 4 or 8 digits
                std::size_t ndigits = (escape == 'u' ? 4 : 8);
                value = stringToULLbounded(str, pos, 16, ndigits, ndigits);
                // UTF-8 encodes code points above 0x7f in multiple code units
                // code points above 0x10ffff are not allowed
                if (((narrow || utf8) && value > 0x7f) || (utf16 && value > 0xffff) || value > 0x10ffff)
                    throw std::runtime_error("code point too large");
                if (value >= 0xd800 && value <= 0xdfff)
                    throw std::runtime_error("surrogate code points not allowed in universal character names");
                break;
            }
            default:
                throw std::runtime_error("invalid escape sequence");
            }
        } else {
            value = static_cast<unsigned char>(str[pos++]);
            if (!narrow && value > 0x7f)
                throw std::runtime_error("non-ASCII source characters supported only in narrow character literals");
        }
        if (((narrow || utf8) && value > std::numeric_limits<unsigned char>::max()) || (utf16 && value >> 16) || value >> 32)
            throw std::runtime_error("numeric escape sequence too large");
        multivalue <<= CHAR_BIT;
        multivalue |= value;
        nbytes++;
    }
    if (pos + 1 != str.size() || str[pos] != '\'')
        throw std::runtime_error("missing closing quote in character literal");
    if (!nbytes)
        throw std::runtime_error("empty character literal");
    // ordinary narrow character literal's value is determined by (possibly signed) char
    if (narrow && nbytes == 1)
        return static_cast<char>(multivalue);
    // while multi-character literal's value is determined by (signed) int
    if (narrow)
        return static_cast<int>(multivalue);
    // All other cases are unsigned. Since long long is at least 64bit wide,
    // while the literals at most 32bit wide, the conversion preserves all values.
    return multivalue;
 }
 static void simplifyNumbers(simplecpp::TokenList &expr)
 {
    for (simplecpp::Token *tok = expr.front(); tok; tok = tok->next) {
@ -2311,8 +2514,8 @@ static void simplifyNumbers(simplecpp::TokenList &expr)
            continue;
        if (tok->str().compare(0,2,"0x") == 0)
            tok->setstr(toString(stringToULL(tok->str())));
-        else if (tok->str()[0] == '\'')
+        else if (!tok->number && tok->str().find('\'') != tok->str().npos)
-            tok->setstr(toString(tok->str()[1] & 0xffU));
+            tok->setstr(toString(simplecpp::characterLiteralToLL(tok->str())));
    }
 }
--- a/externals/simplecpp/simplecpp.h
+++ b/externals/simplecpp/simplecpp.h
@ -107,7 +107,8 @@ namespace simplecpp {
        }
        void flags() {
-            name = (std::isalpha((unsigned char)string[0]) || string[0] == '_' || string[0] == '$');
+            name = (std::isalpha((unsigned char)string[0]) || string[0] == '_' || string[0] == '$')
                   && (string.find('\'') == string.npos);
            comment = string.size() > 1U && string[0] == '/' && (string[1] == '/' || string[1] == '*');
            number = std::isdigit((unsigned char)string[0]) || (string.size() > 1U && string[0] == '-' && std::isdigit((unsigned char)string[1]));
            op = (string.size() == 1U) ? string[0] : '\0';
@ -299,6 +300,8 @@ namespace simplecpp {
        std::string std;
    };
    SIMPLECPP_LIB long long characterLiteralToLL(const std::string& str);
    SIMPLECPP_LIB std::map<std::string, TokenList*> load(const TokenList &rawtokens, std::vector<std::string> &filenames, const DUI &dui, OutputList *outputList = NULL);
    /**