MathLib::characterLiteralToLongNumber can encode unicode char literals, though it's incompatible to gcc/clang
This commit is contained in:
parent
5fabe66ff7
commit
78489f4293
|
@ -322,33 +322,38 @@ static bool isOctalDigitString(const std::string& str)
|
|||
return true;
|
||||
}
|
||||
|
||||
static unsigned int encodeMultiChar(const std::string& str)
|
||||
{
|
||||
unsigned int retval(str.front());
|
||||
for (std::string::const_iterator it=str.begin()+1; it!=str.end(); ++it) {
|
||||
retval = retval<<8 | *it;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
MathLib::bigint MathLib::characterLiteralToLongNumber(const std::string& str)
|
||||
{
|
||||
if (str.empty())
|
||||
return 0; // for unit-testing...
|
||||
if (str.size()==1)
|
||||
return str[0] & 0xff;
|
||||
const std::string& str1 = str.substr(1);
|
||||
if (str[0] != '\\') {
|
||||
// C99 6.4.4.4
|
||||
// The value of an integer character constant containing more than one character (e.g., 'ab'),
|
||||
// or containing a character or escape sequence that does not map to a single-byte execution character,
|
||||
// is implementation-defined.
|
||||
// clang and gcc seem to use the following encoding: 'AB' as (('A' << 8) | 'B')
|
||||
unsigned int retval(str.front());
|
||||
for (std::string::const_iterator it=str1.begin(); it!=str1.end(); ++it) {
|
||||
retval = retval<<8 | *it;
|
||||
}
|
||||
return (MathLib::bigint)retval;
|
||||
return encodeMultiChar(str);
|
||||
}
|
||||
const std::string& str1 = str.substr(1);
|
||||
|
||||
switch (str1[0]) {
|
||||
case 'x':
|
||||
return toLongNumber("0x" + str.substr(2));
|
||||
case 'u': // 16bit unicode character
|
||||
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled 16-bit unicode char constant \\" + str);
|
||||
case 'U': // 16bit unicode character
|
||||
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled 32-bit unicode char constant \\" + str);
|
||||
case 'u': // 16-bit unicode character
|
||||
return encodeMultiChar(str1);
|
||||
case 'U': // 32-bit unicode character
|
||||
return encodeMultiChar(str1);
|
||||
default: {
|
||||
char c;
|
||||
switch (str.size()-1) {
|
||||
|
|
|
@ -290,9 +290,15 @@ private:
|
|||
ASSERT_EQUALS((int)('\134'), MathLib::toLongNumber("'\\134'"));
|
||||
ASSERT_THROW(MathLib::toLongNumber("'\\9'"), InternalError);
|
||||
ASSERT_THROW(MathLib::toLongNumber("'\\934'"), InternalError);
|
||||
ASSERT_THROW(MathLib::toLongNumber("'\\u9343'"), InternalError);
|
||||
ASSERT_THROW(MathLib::toLongNumber("'\\U0001f34c'"), InternalError);
|
||||
|
||||
// that is not gcc/clang encoding
|
||||
ASSERT_EQUALS(959657011, MathLib::toLongNumber("'\\u9343'"));
|
||||
ASSERT_EQUALS(1714631779, MathLib::toLongNumber("'\\U0001f34c'"));
|
||||
#ifdef __GNUC__
|
||||
// BEGIN Implementation-specific results
|
||||
TODO_ASSERT_EQUALS((int)'\u9343', 959657011, MathLib::toLongNumber("'\\u9343'"));
|
||||
TODO_ASSERT_EQUALS((int)'\U0001f34c', 1714631779, MathLib::toLongNumber("'\\U0001f34c'"));
|
||||
// END Implementation-specific results
|
||||
#endif
|
||||
{
|
||||
// some unit-testing for a utility function
|
||||
ASSERT_EQUALS(0, MathLib::characterLiteralToLongNumber(std::string("")));
|
||||
|
|
Loading…
Reference in New Issue