Ticket #7452: Properly interpret escape sequences in character literals.
This commit is contained in:
parent
9d1302d523
commit
3af30e728c
124
lib/mathlib.cpp
124
lib/mathlib.cpp
|
@ -335,15 +335,6 @@ MathLib::biguint MathLib::toULongNumber(const std::string & str)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static bool isOctalDigitString(const std::string& str)
|
||||
{
|
||||
for (std::string::const_iterator it=str.begin(); it!=str.end(); ++it) {
|
||||
if (!MathLib::isOctalDigit(*it))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static unsigned int encodeMultiChar(const std::string& str)
|
||||
{
|
||||
unsigned int retval(str.front());
|
||||
|
@ -357,30 +348,50 @@ MathLib::bigint MathLib::characterLiteralToLongNumber(const std::string& str)
|
|||
{
|
||||
if (str.empty())
|
||||
return 0; // for unit-testing...
|
||||
if (str.size()==1)
|
||||
return str[0] & 0xff;
|
||||
if (str[0] != '\\') {
|
||||
// C99 6.4.4.4
|
||||
// The value of an integer character constant containing more than one character (e.g., 'ab'),
|
||||
// or containing a character or escape sequence that does not map to a single-byte execution character,
|
||||
// is implementation-defined.
|
||||
// clang and gcc seem to use the following encoding: 'AB' as (('A' << 8) | 'B')
|
||||
return encodeMultiChar(str);
|
||||
}
|
||||
const std::string& str1 = str.substr(1);
|
||||
|
||||
switch (str1[0]) {
|
||||
case 'x':
|
||||
return toLongNumber("0x" + str.substr(2));
|
||||
case 'u': // 16-bit unicode character
|
||||
return encodeMultiChar(str1);
|
||||
case 'U': // 32-bit unicode character
|
||||
return encodeMultiChar(str1);
|
||||
default: {
|
||||
char c;
|
||||
switch (str.size()-1) {
|
||||
case 1:
|
||||
switch (str[1]) {
|
||||
// C99 6.4.4.4
|
||||
// The value of an integer character constant containing more than one character (e.g., 'ab'),
|
||||
// or containing a character or escape sequence that does not map to a single-byte execution character,
|
||||
// is implementation-defined.
|
||||
// clang and gcc seem to use the following encoding: 'AB' as (('A' << 8) | 'B')
|
||||
const std::string& normStr = normalizeCharacterLiteral(str);
|
||||
return encodeMultiChar(normStr);
|
||||
}
|
||||
|
||||
std::string MathLib::normalizeCharacterLiteral(const std::string& iLiteral)
|
||||
{
|
||||
std::string normalizedLiteral;
|
||||
const std::string::size_type iLiteralLen = iLiteral.size();
|
||||
for (std::string::size_type idx = 0; idx < iLiteralLen ; ++idx) {
|
||||
if (iLiteral[idx] != '\\') {
|
||||
normalizedLiteral.push_back(iLiteral[idx]);
|
||||
continue;
|
||||
}
|
||||
++idx;
|
||||
if (idx == iLiteralLen) {
|
||||
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
|
||||
}
|
||||
switch (iLiteral[idx]) {
|
||||
case 'x':
|
||||
// Hexa-decimal number: skip \x and interpret the next two characters
|
||||
{
|
||||
if (++idx == iLiteralLen)
|
||||
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
|
||||
std::string tempBuf;
|
||||
tempBuf.push_back(iLiteral[idx]);
|
||||
if (++idx != iLiteralLen)
|
||||
tempBuf.push_back(iLiteral[idx]);
|
||||
normalizedLiteral.push_back(static_cast<char>(MathLib::toULongNumber("0x" + tempBuf)));
|
||||
continue;
|
||||
}
|
||||
case 'u':
|
||||
case 'U':
|
||||
// Unicode string; just skip the \u or \U
|
||||
continue;
|
||||
}
|
||||
// Single digit octal number
|
||||
if (1 == std::min<unsigned>(3, iLiteralLen - idx)) {
|
||||
switch (iLiteral[idx]) {
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
|
@ -389,52 +400,59 @@ MathLib::bigint MathLib::characterLiteralToLongNumber(const std::string& str)
|
|||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
return str[1]-'0';
|
||||
normalizedLiteral.push_back(iLiteral[idx]-'0');
|
||||
break;
|
||||
case 'a':
|
||||
c = '\a';
|
||||
normalizedLiteral.push_back('\a');
|
||||
break;
|
||||
case 'b':
|
||||
c = '\b';
|
||||
normalizedLiteral.push_back('\b');
|
||||
break;
|
||||
case 'e':
|
||||
c = 0x1B; // clang, gcc, tcc interpret this as 0x1B - escape character
|
||||
normalizedLiteral.push_back(0x1B); // clang, gcc, tcc interpnormalizedLiteral this as 0x1B - escape character
|
||||
break;
|
||||
case 'f':
|
||||
c = '\f';
|
||||
normalizedLiteral.push_back('\f');
|
||||
break;
|
||||
case 'n':
|
||||
c = '\n';
|
||||
normalizedLiteral.push_back('\n');
|
||||
break;
|
||||
case 'r':
|
||||
c = '\r';
|
||||
normalizedLiteral.push_back('\r');
|
||||
break;
|
||||
case 't':
|
||||
c = '\t';
|
||||
normalizedLiteral.push_back('\t');
|
||||
break;
|
||||
case 'v':
|
||||
c = '\v';
|
||||
normalizedLiteral.push_back('\v');
|
||||
break;
|
||||
case '\\':
|
||||
case '\?':
|
||||
case '\'':
|
||||
case '\"':
|
||||
c = str[1];
|
||||
normalizedLiteral.push_back(iLiteral[idx]);
|
||||
break;
|
||||
default:
|
||||
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + str + "'.");
|
||||
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
|
||||
}
|
||||
return c & 0xff;
|
||||
case 2:
|
||||
case 3:
|
||||
if (isOctalDigitString(str1))
|
||||
return toLongNumber("0" + str1);
|
||||
break;
|
||||
|
||||
continue;
|
||||
}
|
||||
// 2-3 digit octal number
|
||||
if (!MathLib::isOctalDigit(iLiteral[idx]))
|
||||
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + iLiteral + "'.");
|
||||
std::string tempBuf;
|
||||
tempBuf.push_back(iLiteral[idx]);
|
||||
++idx;
|
||||
if (MathLib::isOctalDigit(iLiteral[idx])) {
|
||||
tempBuf.push_back(iLiteral[idx]);
|
||||
++idx;
|
||||
if (MathLib::isOctalDigit(iLiteral[idx])) {
|
||||
tempBuf.push_back(iLiteral[idx]);
|
||||
}
|
||||
}
|
||||
normalizedLiteral.push_back(static_cast<char>(MathLib::toLongNumber("0" + tempBuf)));
|
||||
}
|
||||
}
|
||||
|
||||
throw InternalError(0, "Internal Error. MathLib::toLongNumber: Unhandled char constant '" + str + "'.");
|
||||
return normalizedLiteral;
|
||||
}
|
||||
|
||||
MathLib::bigint MathLib::toLongNumber(const std::string & str)
|
||||
|
|
|
@ -31,6 +31,8 @@
|
|||
/** @brief simple math functions that uses operands stored in std::string. useful when performing math on tokens. */
|
||||
|
||||
class CPPCHECKLIB MathLib {
|
||||
friend class TestMathLib;
|
||||
|
||||
public:
|
||||
/** @brief value class */
|
||||
class value {
|
||||
|
@ -120,6 +122,12 @@ public:
|
|||
* */
|
||||
static MathLib::bigint characterLiteralToLongNumber(const std::string& str);
|
||||
|
||||
private:
|
||||
/*
|
||||
* \param iLiteral A character literal
|
||||
* \return The equivalent character literal with all escapes interpreted
|
||||
*/
|
||||
static std::string normalizeCharacterLiteral(const std::string& iLiteral);
|
||||
};
|
||||
|
||||
MathLib::value operator+(const MathLib::value &v1, const MathLib::value &v2);
|
||||
|
|
|
@ -58,6 +58,7 @@ private:
|
|||
TEST_CASE(tan);
|
||||
TEST_CASE(abs);
|
||||
TEST_CASE(toString);
|
||||
TEST_CASE(characterLiteralsNormalization);
|
||||
}
|
||||
|
||||
void isGreater() const {
|
||||
|
@ -289,6 +290,7 @@ private:
|
|||
ASSERT_EQUALS((int)('\34'), MathLib::toLongNumber("'\\34'"));
|
||||
ASSERT_EQUALS((int)('\034'), MathLib::toLongNumber("'\\034'"));
|
||||
ASSERT_EQUALS((int)('\134'), MathLib::toLongNumber("'\\134'"));
|
||||
ASSERT_EQUALS((int)('\134t'), MathLib::toLongNumber("'\\134t'")); // Ticket #7452
|
||||
ASSERT_THROW(MathLib::toLongNumber("'\\9'"), InternalError);
|
||||
ASSERT_THROW(MathLib::toLongNumber("'\\934'"), InternalError);
|
||||
// that is not gcc/clang encoding
|
||||
|
@ -1119,6 +1121,27 @@ private:
|
|||
ASSERT_EQUALS("0" , MathLib::toString(+0.0l));
|
||||
ASSERT_EQUALS("-0" , MathLib::toString(-0.0L));
|
||||
}
|
||||
|
||||
void characterLiteralsNormalization() {
|
||||
// `A` is 0x41 and 0101
|
||||
ASSERT_EQUALS("A" , MathLib::normalizeCharacterLiteral("\\x41"));
|
||||
ASSERT_EQUALS("A" , MathLib::normalizeCharacterLiteral("\\101"));
|
||||
// Hexa and octal numbers should not only be intepreted in byte 1
|
||||
ASSERT_EQUALS("TESTATEST" , MathLib::normalizeCharacterLiteral("TEST\\x41TEST"));
|
||||
ASSERT_EQUALS("TESTATEST" , MathLib::normalizeCharacterLiteral("TEST\\101TEST"));
|
||||
ASSERT_EQUALS("TESTTESTA" , MathLib::normalizeCharacterLiteral("TESTTEST\\x41"));
|
||||
ASSERT_EQUALS("TESTTESTA" , MathLib::normalizeCharacterLiteral("TESTTEST\\101"));
|
||||
// Single escape sequences
|
||||
ASSERT_EQUALS("\?" , MathLib::normalizeCharacterLiteral("\\?"));
|
||||
ASSERT_EQUALS("\'" , MathLib::normalizeCharacterLiteral("\\'"));
|
||||
// Incomplete hexa and octal sequences
|
||||
ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\"), InternalError);
|
||||
ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\x"), InternalError);
|
||||
// No octal digit in an octal sequence
|
||||
ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\9"), InternalError);
|
||||
// Unsupported single escape sequence
|
||||
ASSERT_THROW(MathLib::normalizeCharacterLiteral("\\c"), InternalError);
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_TEST(TestMathLib)
|
||||
|
|
Loading…
Reference in New Issue