From 78489f429355e2f852ab35b65099f7f53eea6a5c Mon Sep 17 00:00:00 2001 From: Alexander Mai Date: Sat, 28 Nov 2015 16:37:26 +0100 Subject: [PATCH] MathLib::characterLiteralToLongNumber can encode unicode char literals, though it's incompatible to gcc/clang --- lib/mathlib.cpp | 25 +++++++++++++++---------- test/testmathlib.cpp | 12 +++++++++--- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/lib/mathlib.cpp b/lib/mathlib.cpp index 6fa4f9656..c75c14786 100644 --- a/lib/mathlib.cpp +++ b/lib/mathlib.cpp @@ -322,33 +322,38 @@ static bool isOctalDigitString(const std::string& str) return true; } +static unsigned int encodeMultiChar(const std::string& str) +{ + unsigned int retval(str.front()); + for (std::string::const_iterator it=str.begin()+1; it!=str.end(); ++it) { + retval = retval<<8 | *it; + } + return retval; +} + MathLib::bigint MathLib::characterLiteralToLongNumber(const std::string& str) { if (str.empty()) return 0; // for unit-testing... if (str.size()==1) return str[0] & 0xff; - const std::string& str1 = str.substr(1); if (str[0] != '\\') { // C99 6.4.4.4 // The value of an integer character constant containing more than one character (e.g., 'ab'), // or containing a character or escape sequence that does not map to a single-byte execution character, // is implementation-defined. // clang and gcc seem to use the following encoding: 'AB' as (('A' << 8) | 'B') - unsigned int retval(str.front()); - for (std::string::const_iterator it=str1.begin(); it!=str1.end(); ++it) { - retval = retval<<8 | *it; - } - return (MathLib::bigint)retval; + return encodeMultiChar(str); } + const std::string& str1 = str.substr(1); switch (str1[0]) { case 'x': return toLongNumber("0x" + str.substr(2)); - case 'u': // 16bit unicode character - throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled 16-bit unicode char constant \\" + str); - case 'U': // 16bit unicode character - throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled 32-bit unicode char constant \\" + str); + case 'u': // 16-bit unicode character + return encodeMultiChar(str1); + case 'U': // 32-bit unicode character + return encodeMultiChar(str1); default: { char c; switch (str.size()-1) { diff --git a/test/testmathlib.cpp b/test/testmathlib.cpp index aaa8c99b8..3dde41fd2 100644 --- a/test/testmathlib.cpp +++ b/test/testmathlib.cpp @@ -290,9 +290,15 @@ private: ASSERT_EQUALS((int)('\134'), MathLib::toLongNumber("'\\134'")); ASSERT_THROW(MathLib::toLongNumber("'\\9'"), InternalError); ASSERT_THROW(MathLib::toLongNumber("'\\934'"), InternalError); - ASSERT_THROW(MathLib::toLongNumber("'\\u9343'"), InternalError); - ASSERT_THROW(MathLib::toLongNumber("'\\U0001f34c'"), InternalError); - + // that is not gcc/clang encoding + ASSERT_EQUALS(959657011, MathLib::toLongNumber("'\\u9343'")); + ASSERT_EQUALS(1714631779, MathLib::toLongNumber("'\\U0001f34c'")); +#ifdef __GNUC__ + // BEGIN Implementation-specific results + TODO_ASSERT_EQUALS((int)'\u9343', 959657011, MathLib::toLongNumber("'\\u9343'")); + TODO_ASSERT_EQUALS((int)'\U0001f34c', 1714631779, MathLib::toLongNumber("'\\U0001f34c'")); + // END Implementation-specific results +#endif { // some unit-testing for a utility function ASSERT_EQUALS(0, MathLib::characterLiteralToLongNumber(std::string("")));