From 92f4113b5952a4bfaea6ffef6f24f20541e3aca8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Marjam=C3=A4ki?= Date: Sun, 17 Mar 2019 13:09:15 +0100 Subject: [PATCH] Array index: Checking array index out of bounds for dynamic buffers --- lib/checkbufferoverrun.cpp | 91 ++++++++++++++++++++++---------------- lib/checkbufferoverrun.h | 10 ++--- lib/symboldatabase.cpp | 27 +++++++++++ lib/symboldatabase.h | 3 ++ lib/token.cpp | 4 ++ lib/valueflow.cpp | 52 ++++++++++++++++++++++ lib/valueflow.h | 9 +++- test/testbufferoverrun.cpp | 8 ++-- 8 files changed, 155 insertions(+), 49 deletions(-) diff --git a/lib/checkbufferoverrun.cpp b/lib/checkbufferoverrun.cpp index df0bfb327..af1b25ba7 100644 --- a/lib/checkbufferoverrun.cpp +++ b/lib/checkbufferoverrun.cpp @@ -60,6 +60,27 @@ static const CWE CWE788(788U); // Access of Memory Location After End of Buffer //--------------------------------------------------------------------------- +static std::vector getDynamicDimensions(const Token *tok, MathLib::bigint typeSize) +{ + if (typeSize == 0) { + const std::vector dimensions; + return dimensions; + } + for (const ValueFlow::Value &value : tok->values()) { + if (!value.isBufferSizeValue()) + continue; + Dimension dim; + dim.tok = nullptr; + dim.num = value.intvalue / typeSize; + dim.known = value.isKnown(); + const std::vector dimensions{dim}; + return dimensions; + } + + const std::vector dimensions; + return dimensions; +} + static size_t getMinFormatStringOutputLength(const std::vector ¶meters, unsigned int formatStringArgNr) { if (formatStringArgNr == 0 || formatStringArgNr > parameters.size()) @@ -196,18 +217,32 @@ void CheckBufferOverrun::arrayIndex() if (!indexToken) continue; - const Token *stringLiteral = nullptr; + std::vector dimensions; - if (!array->variable()->isArray() && array->variable()->dimensions().empty()) { - stringLiteral = array->getValueTokenMinStrSize(); - if (!stringLiteral) - continue; + bool mightBeLarger; + + if (array->variable()->isArray() && !array->variable()->dimensions().empty()) { + dimensions = array->variable()->dimensions(); + mightBeLarger = (dimensions.size() >= 1 && (dimensions[0].num <= 1 || !dimensions[0].tok)); + } else if (const Token *stringLiteral = array->getValueTokenMinStrSize()) { + Dimension dim; + dim.tok = nullptr; + dim.num = Token::getStrSize(stringLiteral); + dim.known = array->hasKnownValue(); + dimensions.emplace_back(dim); + mightBeLarger = false; + } else if (array->valueType() && array->valueType()->pointer >= 1 && array->valueType()->isIntegral()) { + dimensions = getDynamicDimensions(array, array->valueType()->typeSize(*mSettings)); + mightBeLarger = false; } - const MathLib::bigint dim = stringLiteral ? Token::getStrSize(stringLiteral) : array->variable()->dimensions()[0].num; + if (dimensions.empty()) + continue; + + const MathLib::bigint dim = dimensions[0].num; // Positive index - if (stringLiteral || dim > 1) { // TODO check arrays with dim 1 also + if (!mightBeLarger) { // TODO check arrays with dim 1 also for (int cond = 0; cond < 2; cond++) { const ValueFlow::Value *value = indexToken->getMaxValue(cond == 1); if (!value) @@ -222,22 +257,22 @@ void CheckBufferOverrun::arrayIndex() if (parent->isUnaryOp("&")) continue; } - arrayIndexError(tok, array->variable(), value); + arrayIndexError(tok, dimensions, value); } } // Negative index const ValueFlow::Value *negativeValue = indexToken->getValueLE(-1, mSettings); if (negativeValue) { - negativeIndexError(tok, array->variable(), negativeValue); + negativeIndexError(tok, dimensions, negativeValue); } } } -static std::string arrayIndexMessage(const Token *tok, const Variable *var, const ValueFlow::Value *index) +static std::string arrayIndexMessage(const Token *tok, const std::vector &dimensions, const ValueFlow::Value *index) { std::string array = tok->astOperand1()->expressionString(); - for (const Dimension &dim : var->dimensions()) + for (const Dimension &dim : dimensions) array += "[" + MathLib::toString(dim.num) + "]"; std::ostringstream errmsg; @@ -250,7 +285,7 @@ static std::string arrayIndexMessage(const Token *tok, const Variable *var, cons return errmsg.str(); } -void CheckBufferOverrun::arrayIndexError(const Token *tok, const Variable *var, const ValueFlow::Value *index) +void CheckBufferOverrun::arrayIndexError(const Token *tok, const std::vector &dimensions, const ValueFlow::Value *index) { if (!tok) { reportError(tok, Severity::error, "arrayIndexOutOfBounds", "Array 'arr[16]' accessed at index 16, which is out of bounds.", CWE788, false); @@ -261,12 +296,12 @@ void CheckBufferOverrun::arrayIndexError(const Token *tok, const Variable *var, reportError(getErrorPath(tok, index, "Array index out of bounds"), index->errorSeverity() ? Severity::error : Severity::warning, index->condition ? "arrayIndexOutOfBoundsCond" : "arrayIndexOutOfBounds", - arrayIndexMessage(tok, var, index), + arrayIndexMessage(tok, dimensions, index), CWE788, index->isInconclusive()); } -void CheckBufferOverrun::negativeIndexError(const Token *tok, const Variable *var, const ValueFlow::Value *negativeValue) +void CheckBufferOverrun::negativeIndexError(const Token *tok, const std::vector &dimensions, const ValueFlow::Value *negativeValue) { if (!negativeValue) { reportError(tok, Severity::error, "negativeIndex", "Negative array index", CWE786, false); @@ -279,7 +314,7 @@ void CheckBufferOverrun::negativeIndexError(const Token *tok, const Variable *va reportError(getErrorPath(tok, negativeValue, "Negative array index"), negativeValue->errorSeverity() ? Severity::error : Severity::warning, "negativeIndex", - arrayIndexMessage(tok, var, negativeValue), + arrayIndexMessage(tok, dimensions, negativeValue), CWE786, negativeValue->isInconclusive()); } @@ -299,30 +334,8 @@ size_t CheckBufferOverrun::getBufferSize(const Token *bufTok) const dim *= d.num; if (var->isPointerArray()) return dim * mSettings->sizeof_pointer; - switch (bufTok->valueType()->type) { - case ValueType::Type::BOOL: - return dim * mSettings->sizeof_bool; - case ValueType::Type::CHAR: - return dim; - case ValueType::Type::SHORT: - return dim * mSettings->sizeof_short; - case ValueType::Type::INT: - return dim * mSettings->sizeof_int; - case ValueType::Type::LONG: - return dim * mSettings->sizeof_long; - case ValueType::Type::LONGLONG: - return dim * mSettings->sizeof_long_long; - case ValueType::Type::FLOAT: - return dim * mSettings->sizeof_float; - case ValueType::Type::DOUBLE: - return dim * mSettings->sizeof_double; - case ValueType::Type::LONGDOUBLE: - return dim * mSettings->sizeof_long_double; - default: - // TODO: Get size of other types - break; - }; - return 0; + const MathLib::bigint typeSize = bufTok->valueType()->typeSize(*mSettings); + return dim * typeSize; } // TODO: For pointers get pointer value.. return 0; diff --git a/lib/checkbufferoverrun.h b/lib/checkbufferoverrun.h index ebf774b71..5769cf84d 100644 --- a/lib/checkbufferoverrun.h +++ b/lib/checkbufferoverrun.h @@ -27,6 +27,7 @@ #include "errorlogger.h" #include "mathlib.h" #include "tokenize.h" +#include "symboldatabase.h" #include #include @@ -69,16 +70,16 @@ public: void getErrorMessages(ErrorLogger *errorLogger, const Settings *settings) const OVERRIDE { CheckBufferOverrun c(nullptr, settings, errorLogger); - c.arrayIndexError(nullptr, nullptr, nullptr); - c.negativeIndexError(nullptr, nullptr, nullptr); + c.arrayIndexError(nullptr, std::vector(), nullptr); + c.negativeIndexError(nullptr, std::vector(), nullptr); c.arrayIndexThenCheckError(nullptr, "i"); } private: void arrayIndex(); - void arrayIndexError(const Token *tok, const Variable *var, const ValueFlow::Value *index); - void negativeIndexError(const Token *tok, const Variable *var, const ValueFlow::Value *negativeValue); + void arrayIndexError(const Token *tok, const std::vector &dimensions, const ValueFlow::Value *index); + void negativeIndexError(const Token *tok, const std::vector &dimensions, const ValueFlow::Value *negativeValue); void bufferOverflow(); void bufferOverflowError(const Token *tok); @@ -90,7 +91,6 @@ private: void terminateStrncpyError(const Token *tok, const std::string &varname); void bufferNotZeroTerminatedError(const Token *tok, const std::string &varname, const std::string &function); - size_t getBufferSize(const Token *bufTok) const; static std::string myName() { diff --git a/lib/symboldatabase.cpp b/lib/symboldatabase.cpp index 1f56adb4c..d8f17d515 100644 --- a/lib/symboldatabase.cpp +++ b/lib/symboldatabase.cpp @@ -5636,6 +5636,33 @@ std::string ValueType::dump() const return ret.str(); } +MathLib::bigint ValueType::typeSize(const cppcheck::Platform &platform) const +{ + switch (type) { + case ValueType::Type::BOOL: + return platform.sizeof_bool; + case ValueType::Type::CHAR: + return 1; + case ValueType::Type::SHORT: + return platform.sizeof_short; + case ValueType::Type::INT: + return platform.sizeof_int; + case ValueType::Type::LONG: + return platform.sizeof_long; + case ValueType::Type::LONGLONG: + return platform.sizeof_long_long; + case ValueType::Type::FLOAT: + return platform.sizeof_float; + case ValueType::Type::DOUBLE: + return platform.sizeof_double; + case ValueType::Type::LONGDOUBLE: + return platform.sizeof_long_double; + default: + return 0; + }; + +} + std::string ValueType::str() const { std::string ret; diff --git a/lib/symboldatabase.h b/lib/symboldatabase.h index b30cf926c..862d715dd 100644 --- a/lib/symboldatabase.h +++ b/lib/symboldatabase.h @@ -24,6 +24,7 @@ #include "config.h" #include "library.h" #include "mathlib.h" +#include "platform.h" #include "token.h" #include @@ -1127,6 +1128,8 @@ public: return typeScope && typeScope->type == Scope::eEnum; } + MathLib::bigint typeSize(const cppcheck::Platform &platform) const; + std::string str() const; std::string dump() const; }; diff --git a/lib/token.cpp b/lib/token.cpp index 143434044..bb9698912 100644 --- a/lib/token.cpp +++ b/lib/token.cpp @@ -1432,6 +1432,9 @@ void Token::printValueFlow(bool xml, std::ostream &out) const case ValueFlow::Value::UNINIT: out << "uninit=\"1\""; break; + case ValueFlow::Value::BUFFER_SIZE: + out << "buffer-size=\"" << value.intvalue << "\""; + break; case ValueFlow::Value::CONTAINER_SIZE: out << "container-size=\"" << value.intvalue << '\"'; break; @@ -1472,6 +1475,7 @@ void Token::printValueFlow(bool xml, std::ostream &out) const case ValueFlow::Value::UNINIT: out << "Uninit"; break; + case ValueFlow::Value::BUFFER_SIZE: case ValueFlow::Value::CONTAINER_SIZE: out << "size=" << value.intvalue; break; diff --git a/lib/valueflow.cpp b/lib/valueflow.cpp index c1c2bdf8b..1c1bcd67b 100644 --- a/lib/valueflow.cpp +++ b/lib/valueflow.cpp @@ -5006,6 +5006,55 @@ static void valueFlowFwdAnalysis(const TokenList *tokenlist, const Settings *set } } +static void valueFlowDynamicBufferSize(TokenList *tokenlist, SymbolDatabase *symboldatabase, ErrorLogger *errorLogger, const Settings *settings) +{ + for (const Scope *functionScope : symboldatabase->functionScopes) { + for (const Token *tok = functionScope->bodyStart; tok != functionScope->bodyEnd; tok = tok->next()) { + if (!Token::Match(tok, "[;{}] %var% =")) + continue; + + if (!tok->next()->variable()) + continue; + + const Token *rhs = tok->tokAt(2)->astOperand2(); + while (rhs && rhs->isCast()) + rhs = rhs->astOperand2() ? rhs->astOperand2() : rhs->astOperand1(); + if (!rhs) + continue; + + if (!Token::Match(rhs->previous(), "%name% (")) + continue; + + const Library::AllocFunc *allocFunc = settings->library.alloc(rhs->previous()); + if (!allocFunc || allocFunc->bufferSizeArgValue < 1) + continue; + + const std::vector args = getArguments(rhs->previous()); + if (args.size() < allocFunc->bufferSizeArgValue) + continue; + + const Token *sizeArg = args[allocFunc->bufferSizeArgValue - 1]; + if (!sizeArg || !sizeArg->hasKnownIntValue()) + continue; + + ValueFlow::Value value(sizeArg->getKnownIntValue()); + value.valueType = ValueFlow::Value::ValueType::BUFFER_SIZE; + value.setKnown(); + const std::list values{value}; + valueFlowForward(const_cast(rhs), + functionScope->bodyEnd, + tok->next()->variable(), + tok->next()->varId(), + values, + true, + false, + tokenlist, + errorLogger, + settings); + } + } +} + ValueFlow::Value::Value(const Token *c, long long val) : valueType(INT), intvalue(val), @@ -5037,6 +5086,7 @@ std::string ValueFlow::Value::infoString() const return ""; case UNINIT: return ""; + case BUFFER_SIZE: case CONTAINER_SIZE: return "size=" + MathLib::toString(intvalue); case LIFETIME: @@ -5102,6 +5152,8 @@ void ValueFlow::setValues(TokenList *tokenlist, SymbolDatabase* symboldatabase, valueFlowContainerAfterCondition(tokenlist, symboldatabase, errorLogger, settings); } } + + valueFlowDynamicBufferSize(tokenlist, symboldatabase, errorLogger, settings); } diff --git a/lib/valueflow.h b/lib/valueflow.h index 9b8caf0fa..51d1b8713 100644 --- a/lib/valueflow.h +++ b/lib/valueflow.h @@ -80,6 +80,10 @@ namespace ValueFlow { break; case UNINIT: break; + case BUFFER_SIZE: + if (intvalue != rhs.intvalue) + return false; + break; case CONTAINER_SIZE: if (intvalue != rhs.intvalue) return false; @@ -99,7 +103,7 @@ namespace ValueFlow { std::string infoString() const; - enum ValueType { INT, TOK, FLOAT, MOVED, UNINIT, CONTAINER_SIZE, LIFETIME } valueType; + enum ValueType { INT, TOK, FLOAT, MOVED, UNINIT, CONTAINER_SIZE, LIFETIME, BUFFER_SIZE } valueType; bool isIntValue() const { return valueType == INT; } @@ -121,6 +125,9 @@ namespace ValueFlow { bool isLifetimeValue() const { return valueType == LIFETIME; } + bool isBufferSizeValue() const { + return valueType == BUFFER_SIZE; + } bool isLocalLifetimeValue() const { return valueType == LIFETIME && lifetimeScope == Local; diff --git a/test/testbufferoverrun.cpp b/test/testbufferoverrun.cpp index 30908385f..5623b13b8 100644 --- a/test/testbufferoverrun.cpp +++ b/test/testbufferoverrun.cpp @@ -121,7 +121,7 @@ private: TEST_CASE(array_index_39); TEST_CASE(array_index_40); // loop variable calculation, taking address TEST_CASE(array_index_41); // structs with the same name - // TODO malloc TEST_CASE(array_index_42); + TEST_CASE(array_index_42); TEST_CASE(array_index_43); // struct with array TEST_CASE(array_index_44); // #3979 TEST_CASE(array_index_45); // #4207 - calling function with variable number of parameters (...) @@ -202,9 +202,9 @@ private: TEST_CASE(assign1); - // TODO TEST_CASE(alloc_new); // Buffer allocated with new - // TODO TEST_CASE(alloc_malloc); // Buffer allocated with malloc - // TODO TEST_CASE(alloc_string); // statically allocated buffer + // TODO new TEST_CASE(alloc_new); // Buffer allocated with new + TEST_CASE(alloc_malloc); // Buffer allocated with malloc + TEST_CASE(alloc_string); // statically allocated buffer // TODO TEST_CASE(alloc_alloca); // Buffer allocated with alloca // TODO TEST_CASE(countSprintfLength);