From 97dd8e3c4f38ef345b01fbbf0a2052c7875ff7e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= Date: Tue, 7 May 2024 13:06:11 +0200 Subject: [PATCH] [analyzer] Clean up apiModeling.llvm.ReturnValue (#91231) This commit heavily refactors and simplifies the small and trivial checker `apiModeling.llvm.ReturnValue`, which is responsible for modeling the peculiar coding convention that in the LLVM/Clang codebase certain Error() methods always return true. Changes included in this commit: - The call description mode is now specified explicitly (this is not the most significant change, but it was the original reason for touching this checker). - Previously the code provided support for modeling functions that always return `false`; but there was no need for that, so this commit hardcodes that the return value is `true`. - The overcomplicated constraint/state handling logic was simplified. - The separate `checkEndFunction` callback was removed to simplify the code. Admittedly this means that the note tag for the " returns false, breaking the convention" case is placed on the method call instead of the `return` statement; but that case will _never_ appear in practice, so this difference is mostly academical. - The text of the note tags was clarified. - The descriptions in the header comment and Checkers.td were clarified. - Some minor cleanup was applied in the associated test file. This change is very close to NFC because it only affects a hidden `apiModeling.llvm` checker that's only relevant during the analysis of the LLVM/Clang codebase, and even there it doesn't affect the normal behavior of the checker. --- .../clang/StaticAnalyzer/Checkers/Checkers.td | 2 +- .../Checkers/ReturnValueChecker.cpp | 156 +++++------------- .../test/Analysis/return-value-guaranteed.cpp | 66 ++++---- 3 files changed, 79 insertions(+), 145 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index 520286b57c9fdc..64414e3d37f751 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -1397,7 +1397,7 @@ def CastValueChecker : Checker<"CastValue">, Documentation; def ReturnValueChecker : Checker<"ReturnValue">, - HelpText<"Model the guaranteed boolean return value of function calls">, + HelpText<"Model certain Error() methods that always return true by convention">, Documentation; } // end "apiModeling.llvm" diff --git a/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp index c3112ebe4e7941..3da571adfa44cc 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp @@ -1,4 +1,4 @@ -//===- ReturnValueChecker - Applies guaranteed return values ----*- C++ -*-===// +//===- ReturnValueChecker - Check methods always returning true -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,8 +6,13 @@ // //===----------------------------------------------------------------------===// // -// This defines ReturnValueChecker, which checks for calls with guaranteed -// boolean return value. It ensures the return value of each function call. +// This defines ReturnValueChecker, which models a very specific coding +// convention within the LLVM/Clang codebase: there several classes that have +// Error() methods which always return true. +// This checker was introduced to eliminate false positives caused by this +// peculiar "always returns true" invariant. (Normally, the analyzer assumes +// that a function returning `bool` can return both `true` and `false`, because +// otherwise it could've been a `void` function.) // //===----------------------------------------------------------------------===// @@ -18,43 +23,40 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/FormatVariadic.h" #include using namespace clang; using namespace ento; +using llvm::formatv; namespace { -class ReturnValueChecker : public Checker { +class ReturnValueChecker : public Checker { public: - // It sets the predefined invariant ('CDM') if the current call not break it. void checkPostCall(const CallEvent &Call, CheckerContext &C) const; - // It reports whether a predefined invariant ('CDM') is broken. - void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const; - private: - // The pairs are in the following form: {{{class, call}}, return value} - const CallDescriptionMap CDM = { + const CallDescriptionSet Methods = { // These are known in the LLVM project: 'Error()' - {{{"ARMAsmParser", "Error"}}, true}, - {{{"HexagonAsmParser", "Error"}}, true}, - {{{"LLLexer", "Error"}}, true}, - {{{"LLParser", "Error"}}, true}, - {{{"MCAsmParser", "Error"}}, true}, - {{{"MCAsmParserExtension", "Error"}}, true}, - {{{"TGParser", "Error"}}, true}, - {{{"X86AsmParser", "Error"}}, true}, + {CDM::CXXMethod, {"ARMAsmParser", "Error"}}, + {CDM::CXXMethod, {"HexagonAsmParser", "Error"}}, + {CDM::CXXMethod, {"LLLexer", "Error"}}, + {CDM::CXXMethod, {"LLParser", "Error"}}, + {CDM::CXXMethod, {"MCAsmParser", "Error"}}, + {CDM::CXXMethod, {"MCAsmParserExtension", "Error"}}, + {CDM::CXXMethod, {"TGParser", "Error"}}, + {CDM::CXXMethod, {"X86AsmParser", "Error"}}, // 'TokError()' - {{{"LLParser", "TokError"}}, true}, - {{{"MCAsmParser", "TokError"}}, true}, - {{{"MCAsmParserExtension", "TokError"}}, true}, - {{{"TGParser", "TokError"}}, true}, + {CDM::CXXMethod, {"LLParser", "TokError"}}, + {CDM::CXXMethod, {"MCAsmParser", "TokError"}}, + {CDM::CXXMethod, {"MCAsmParserExtension", "TokError"}}, + {CDM::CXXMethod, {"TGParser", "TokError"}}, // 'error()' - {{{"MIParser", "error"}}, true}, - {{{"WasmAsmParser", "error"}}, true}, - {{{"WebAssemblyAsmParser", "error"}}, true}, + {CDM::CXXMethod, {"MIParser", "error"}}, + {CDM::CXXMethod, {"WasmAsmParser", "error"}}, + {CDM::CXXMethod, {"WebAssemblyAsmParser", "error"}}, // Other - {{{"AsmParser", "printError"}}, true}}; + {CDM::CXXMethod, {"AsmParser", "printError"}}}; }; } // namespace @@ -68,100 +70,32 @@ static std::string getName(const CallEvent &Call) { return Name; } -// The predefinitions ('CDM') could break due to the ever growing code base. -// Check for the expected invariants and see whether they apply. -static std::optional isInvariantBreak(bool ExpectedValue, SVal ReturnV, - CheckerContext &C) { - auto ReturnDV = ReturnV.getAs(); - if (!ReturnDV) - return std::nullopt; - - if (ExpectedValue) - return C.getState()->isNull(*ReturnDV).isConstrainedTrue(); - - return C.getState()->isNull(*ReturnDV).isConstrainedFalse(); -} - void ReturnValueChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const { - const bool *RawExpectedValue = CDM.lookup(Call); - if (!RawExpectedValue) + if (!Methods.contains(Call)) return; - SVal ReturnV = Call.getReturnValue(); - bool ExpectedValue = *RawExpectedValue; - std::optional IsInvariantBreak = - isInvariantBreak(ExpectedValue, ReturnV, C); - if (!IsInvariantBreak) - return; + auto ReturnV = Call.getReturnValue().getAs(); - // If the invariant is broken it is reported by 'checkEndFunction()'. - if (*IsInvariantBreak) + if (!ReturnV) return; - std::string Name = getName(Call); - const NoteTag *CallTag = C.getNoteTag( - [Name, ExpectedValue](PathSensitiveBugReport &) -> std::string { - SmallString<128> Msg; - llvm::raw_svector_ostream Out(Msg); - - Out << '\'' << Name << "' returns " - << (ExpectedValue ? "true" : "false"); - return std::string(Out.str()); - }, - /*IsPrunable=*/true); - ProgramStateRef State = C.getState(); - State = State->assume(ReturnV.castAs(), ExpectedValue); - C.addTransition(State, CallTag); -} - -void ReturnValueChecker::checkEndFunction(const ReturnStmt *RS, - CheckerContext &C) const { - if (!RS || !RS->getRetValue()) + if (ProgramStateRef StTrue = State->assume(*ReturnV, true)) { + // The return value can be true, so transition to a state where it's true. + std::string Msg = + formatv("'{0}' returns true (by convention)", getName(Call)); + C.addTransition(StTrue, C.getNoteTag(Msg, /*IsPrunable=*/true)); return; - - // We cannot get the caller in the top-frame. - const StackFrameContext *SFC = C.getStackFrame(); - if (C.getStackFrame()->inTopFrame()) - return; - - ProgramStateRef State = C.getState(); - CallEventManager &CMgr = C.getStateManager().getCallEventManager(); - CallEventRef<> Call = CMgr.getCaller(SFC, State); - if (!Call) - return; - - const bool *RawExpectedValue = CDM.lookup(*Call); - if (!RawExpectedValue) - return; - - SVal ReturnV = State->getSVal(RS->getRetValue(), C.getLocationContext()); - bool ExpectedValue = *RawExpectedValue; - std::optional IsInvariantBreak = - isInvariantBreak(ExpectedValue, ReturnV, C); - if (!IsInvariantBreak) - return; - - // If the invariant is appropriate it is reported by 'checkPostCall()'. - if (!*IsInvariantBreak) - return; - - std::string Name = getName(*Call); - const NoteTag *CallTag = C.getNoteTag( - [Name, ExpectedValue](BugReport &BR) -> std::string { - SmallString<128> Msg; - llvm::raw_svector_ostream Out(Msg); - - // The following is swapped because the invariant is broken. - Out << '\'' << Name << "' returns " - << (ExpectedValue ? "false" : "true"); - - return std::string(Out.str()); - }, - /*IsPrunable=*/false); - - C.addTransition(State, CallTag); + } + // Paranoia: if the return value is known to be false (which is highly + // unlikely, it's easy to ensure that the method always returns true), then + // produce a note that highlights that this unusual situation. + // Note that this checker is 'hidden' so it cannot produce a bug report. + std::string Msg = formatv("'{0}' returned false, breaking the convention " + "that it always returns true", + getName(Call)); + C.addTransition(State, C.getNoteTag(Msg, /*IsPrunable=*/true)); } void ento::registerReturnValueChecker(CheckerManager &Mgr) { diff --git a/clang/test/Analysis/return-value-guaranteed.cpp b/clang/test/Analysis/return-value-guaranteed.cpp index 367a8e5906afcd..3b010ffba36001 100644 --- a/clang/test/Analysis/return-value-guaranteed.cpp +++ b/clang/test/Analysis/return-value-guaranteed.cpp @@ -1,91 +1,91 @@ // RUN: %clang_analyze_cc1 \ // RUN: -analyzer-checker=core,apiModeling.llvm.ReturnValue \ -// RUN: -analyzer-output=text -verify=class %s +// RUN: -analyzer-output=text -verify %s struct Foo { int Field; }; bool problem(); void doSomething(); -// We predefined the return value of 'MCAsmParser::Error' as true and we cannot -// take the false-branches which leads to a "garbage value" false positive. -namespace test_classes { +// Test the normal case when the implementation of MCAsmParser::Error() (one of +// the methods modeled by this checker) is opaque. +namespace test_normal { struct MCAsmParser { static bool Error(); }; bool parseFoo(Foo &F) { if (problem()) { - // class-note@-1 {{Assuming the condition is false}} - // class-note@-2 {{Taking false branch}} + // expected-note@-1 {{Assuming the condition is false}} + // expected-note@-2 {{Taking false branch}} return MCAsmParser::Error(); } F.Field = 0; - // class-note@-1 {{The value 0 is assigned to 'F.Field'}} - return !MCAsmParser::Error(); - // class-note@-1 {{'MCAsmParser::Error' returns true}} + // expected-note@-1 {{The value 0 is assigned to 'F.Field'}} + return false; } bool parseFile() { Foo F; if (parseFoo(F)) { - // class-note@-1 {{Calling 'parseFoo'}} - // class-note@-2 {{Returning from 'parseFoo'}} - // class-note@-3 {{Taking false branch}} + // expected-note@-1 {{Calling 'parseFoo'}} + // expected-note@-2 {{Returning from 'parseFoo'}} + // expected-note@-3 {{Taking false branch}} return true; } + // The following expression would produce the false positive report + // "The left operand of '==' is a garbage value" + // without the modeling done by apiModeling.llvm.ReturnValue: if (F.Field == 0) { - // class-note@-1 {{Field 'Field' is equal to 0}} - // class-note@-2 {{Taking true branch}} - - // no-warning: "The left operand of '==' is a garbage value" was here. + // expected-note@-1 {{Field 'Field' is equal to 0}} + // expected-note@-2 {{Taking true branch}} doSomething(); } + // Trigger a zero division to get path notes: (void)(1 / F.Field); - // class-warning@-1 {{Division by zero}} - // class-note@-2 {{Division by zero}} + // expected-warning@-1 {{Division by zero}} + // expected-note@-2 {{Division by zero}} return false; } -} // namespace test_classes +} // namespace test_normal -// We predefined 'MCAsmParser::Error' as returning true, but now it returns -// false, which breaks our invariant. Test the notes. +// Sanity check for the highly unlikely case where the implementation of the +// method breaks the convention. namespace test_break { struct MCAsmParser { static bool Error() { - return false; // class-note {{'MCAsmParser::Error' returns false}} + return false; } }; bool parseFoo(Foo &F) { if (problem()) { - // class-note@-1 {{Assuming the condition is false}} - // class-note@-2 {{Taking false branch}} + // expected-note@-1 {{Assuming the condition is false}} + // expected-note@-2 {{Taking false branch}} return !MCAsmParser::Error(); } F.Field = 0; - // class-note@-1 {{The value 0 is assigned to 'F.Field'}} + // expected-note@-1 {{The value 0 is assigned to 'F.Field'}} return MCAsmParser::Error(); - // class-note@-1 {{Calling 'MCAsmParser::Error'}} - // class-note@-2 {{Returning from 'MCAsmParser::Error'}} + // expected-note@-1 {{'MCAsmParser::Error' returned false, breaking the convention that it always returns true}} } bool parseFile() { Foo F; if (parseFoo(F)) { - // class-note@-1 {{Calling 'parseFoo'}} - // class-note@-2 {{Returning from 'parseFoo'}} - // class-note@-3 {{Taking false branch}} + // expected-note@-1 {{Calling 'parseFoo'}} + // expected-note@-2 {{Returning from 'parseFoo'}} + // expected-note@-3 {{Taking false branch}} return true; } (void)(1 / F.Field); - // class-warning@-1 {{Division by zero}} - // class-note@-2 {{Division by zero}} + // expected-warning@-1 {{Division by zero}} + // expected-note@-2 {{Division by zero}} return false; } -} // namespace test_classes +} // namespace test_break