diff --git a/lib/tokenize.cpp b/lib/tokenize.cpp index 3ab273575..9b3451ea1 100644 --- a/lib/tokenize.cpp +++ b/lib/tokenize.cpp @@ -8025,47 +8025,44 @@ std::string Tokenizer::simplifyString(const std::string &source) { std::string str = source; - // true when previous char is a \ . - bool escaped = false; - for (std::string::size_type i = 0; i + 2 < str.size(); ++i) { - if (!escaped) { - if (str[i] == '\\') - escaped = true; - + for (std::string::size_type i = 0; i + 1U < str.size(); ++i) { + if (str[i] != '\\') continue; - } - if (str[i] == 'x') { - // Hex value - if (str[i+1] == '0' && str[i+2] == '0') - str.replace(i, 3, "0"); - else if (i > 0) { - // We will replace all other character as 'a' - // If that causes problems in the future, this can - // be improved. But for now, this should be OK. - unsigned int n = 1; - while (n < 2 && std::isxdigit(str[i+1+n])) - ++n; - --i; - n += 2; - str.replace(i, n, "a"); + int c = 'a'; // char + unsigned int sz = 0; // size of stringdata + if (str[i+1] == 'x') { + sz = 2; + while (std::isxdigit(str[i+sz]) && sz < 4) + sz++; + if (sz > 2) { + std::istringstream istr(str.substr(i+2, sz-2)); + istr >> std::hex >> c; } - } else if (MathLib::isOctalDigit(str[i])) { - if (MathLib::isOctalDigit(str[i+1]) && - MathLib::isOctalDigit(str[i+2])) { - if (str[i+1] == '0' && str[i+2] == '0') - str.replace(i, 3, "0"); - else { - // We will replace all other character as 'a' - // If that causes problems in the future, this can - // be improved. But for now, this should be OK. - --i; - str.replace(i, 4, "a"); - } + } else if (MathLib::isOctalDigit(str[i+1])) { + sz = 2; + while (MathLib::isOctalDigit(str[i+sz]) && sz < 4) + sz++; + std::istringstream istr(str.substr(i+1, sz-1)); + istr >> std::oct >> c; + if (sz == 2) { + if (c == 0) { + str = str.substr(0,i) + "\""; + continue; + } else + str[i+1] = (char)c; } } - escaped = false; + if (sz <= 2) + i++; + else if (i+sz < str.size()) { + if (c == 0) + str = str.substr(0,i) + "\""; + else + str.replace(i, sz, std::string(1U, (char)c)); + } else + str.replace(i, str.size() - i - 1U, "a"); } return str; diff --git a/test/testsimplifytokens.cpp b/test/testsimplifytokens.cpp index 63bc18d22..a87fb7e8b 100644 --- a/test/testsimplifytokens.cpp +++ b/test/testsimplifytokens.cpp @@ -183,8 +183,6 @@ private: TEST_CASE(simplifyAtol) - TEST_CASE(simplifyHexInString) - TEST_CASE(simplifyTypedef1) TEST_CASE(simplifyTypedef2) TEST_CASE(simplifyTypedef3) @@ -3603,28 +3601,6 @@ private: ASSERT_EQUALS("a = 10 ;", tok("a = atol(\"0xa\");")); } - void simplifyHexInString() { - ASSERT_EQUALS("\"a\"", tok("\"\\x61\"")); - ASSERT_EQUALS("\"a\"", tok("\"\\141\"")); - - ASSERT_EQUALS("\"\\0\"", tok("\"\\x00\"")); - ASSERT_EQUALS("\"\\0\"", tok("\"\\000\"")); - - ASSERT_EQUALS("\"\\nhello\"", tok("\"\\nhello\"")); - - ASSERT_EQUALS("\"aaa\"", tok("\"\\x61\\x61\\x61\"")); - ASSERT_EQUALS("\"aaa\"", tok("\"\\141\\141\\141\"")); - - ASSERT_EQUALS("\"\\\\x61\"", tok("\"\\\\x61\"")); - - // These tests can fail, if other characters are handled - // more correctly. But for now all non null characters should - // become 'a' - ASSERT_EQUALS("\"a\"", tok("\"\\x62\"")); - ASSERT_EQUALS("\"a\"", tok("\"\\177\"")); - } - - std::string simplifyTypedef(const char code[]) { errout.str(""); diff --git a/test/testtokenize.cpp b/test/testtokenize.cpp index 4f93a0cba..cbce44428 100644 --- a/test/testtokenize.cpp +++ b/test/testtokenize.cpp @@ -5630,9 +5630,27 @@ private: Settings settings; Tokenizer tokenizer(&settings, this); ASSERT_EQUALS("\"abc\"", tokenizer.simplifyString("\"abc\"")); - ASSERT_EQUALS("\"a\"", tokenizer.simplifyString("\"\\x3\"")); - ASSERT_EQUALS("\"a\"", tokenizer.simplifyString("\"\\x33\"")); - ASSERT_EQUALS("\"a3\"", tokenizer.simplifyString("\"\\x333\"")); + ASSERT_EQUALS("\"\n\"", tokenizer.simplifyString("\"\\xa\"")); + ASSERT_EQUALS("\"3\"", tokenizer.simplifyString("\"\\x33\"")); + ASSERT_EQUALS("\"33\"", tokenizer.simplifyString("\"\\x333\"")); + + ASSERT_EQUALS("\"a\"", tokenizer.simplifyString("\"\\x61\"")); + ASSERT_EQUALS("\"\n1\"", tokenizer.simplifyString("\"\\0121\"")); + + ASSERT_EQUALS("\"3\"", tokenizer.simplifyString("\"\\x33\"")); + ASSERT_EQUALS("\" 0\"", tokenizer.simplifyString("\"\\0400\"")); + + ASSERT_EQUALS("\"\\nhello\"", tokenizer.simplifyString("\"\\nhello\"")); + + ASSERT_EQUALS("\"aaa\"", tokenizer.simplifyString("\"\\x61\\x61\\x61\"")); + ASSERT_EQUALS("\"\n1\n1\n1\"", tokenizer.simplifyString("\"\\0121\\0121\\0121\"")); + + ASSERT_EQUALS("\"\\\\x61\"", tokenizer.simplifyString("\"\\\\x61\"")); + ASSERT_EQUALS("\"b\"", tokenizer.simplifyString("\"\\x62\"")); + ASSERT_EQUALS("\" 7\"", tokenizer.simplifyString("\"\\0407\"")); + + // terminate a string at null character. + ASSERT_EQUALS("\"a\"", tokenizer.simplifyString("\"a\\0\"")); } void simplifyConst() {