Skip to content

Commit

Permalink
[analyzer] Clean up apiModeling.llvm.ReturnValue (llvm#91231)
Browse files Browse the repository at this point in the history
This commit heavily refactors and simplifies the small and trivial
checker `apiModeling.llvm.ReturnValue`, which is responsible for
modeling the peculiar coding convention that in the LLVM/Clang codebase
certain Error() methods always return true.

Changes included in this commit:
- The call description mode is now specified explicitly (this is not the
most significant change, but it was the original reason for touching
this checker).
- Previously the code provided support for modeling functions that
always return `false`; but there was no need for that, so this commit
hardcodes that the return value is `true`.
- The overcomplicated constraint/state handling logic was simplified.
- The separate `checkEndFunction` callback was removed to simplify the
code. Admittedly this means that the note tag for the "<method> returns
false, breaking the convention" case is placed on the method call
instead of the `return` statement; but that case will _never_ appear in
practice, so this difference is mostly academical.
- The text of the note tags was clarified.
- The descriptions in the header comment and Checkers.td were clarified.
- Some minor cleanup was applied in the associated test file.

This change is very close to NFC because it only affects a hidden
`apiModeling.llvm` checker that's only relevant during the analysis of
the LLVM/Clang codebase, and even there it doesn't affect the normal
behavior of the checker.
  • Loading branch information
NagyDonat authored May 7, 2024
1 parent 651bdb9 commit 97dd8e3
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 145 deletions.
2 changes: 1 addition & 1 deletion clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,7 @@ def CastValueChecker : Checker<"CastValue">,
Documentation<NotDocumented>;

def ReturnValueChecker : Checker<"ReturnValue">,
HelpText<"Model the guaranteed boolean return value of function calls">,
HelpText<"Model certain Error() methods that always return true by convention">,
Documentation<NotDocumented>;

} // end "apiModeling.llvm"
Expand Down
156 changes: 45 additions & 111 deletions clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
//===- ReturnValueChecker - Applies guaranteed return values ----*- C++ -*-===//
//===- ReturnValueChecker - Check methods always returning true -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This defines ReturnValueChecker, which checks for calls with guaranteed
// boolean return value. It ensures the return value of each function call.
// This defines ReturnValueChecker, which models a very specific coding
// convention within the LLVM/Clang codebase: there several classes that have
// Error() methods which always return true.
// This checker was introduced to eliminate false positives caused by this
// peculiar "always returns true" invariant. (Normally, the analyzer assumes
// that a function returning `bool` can return both `true` and `false`, because
// otherwise it could've been a `void` function.)
//
//===----------------------------------------------------------------------===//

Expand All @@ -18,43 +23,40 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/FormatVariadic.h"
#include <optional>

using namespace clang;
using namespace ento;
using llvm::formatv;

namespace {
class ReturnValueChecker : public Checker<check::PostCall, check::EndFunction> {
class ReturnValueChecker : public Checker<check::PostCall> {
public:
// It sets the predefined invariant ('CDM') if the current call not break it.
void checkPostCall(const CallEvent &Call, CheckerContext &C) const;

// It reports whether a predefined invariant ('CDM') is broken.
void checkEndFunction(const ReturnStmt *RS, CheckerContext &C) const;

private:
// The pairs are in the following form: {{{class, call}}, return value}
const CallDescriptionMap<bool> CDM = {
const CallDescriptionSet Methods = {
// These are known in the LLVM project: 'Error()'
{{{"ARMAsmParser", "Error"}}, true},
{{{"HexagonAsmParser", "Error"}}, true},
{{{"LLLexer", "Error"}}, true},
{{{"LLParser", "Error"}}, true},
{{{"MCAsmParser", "Error"}}, true},
{{{"MCAsmParserExtension", "Error"}}, true},
{{{"TGParser", "Error"}}, true},
{{{"X86AsmParser", "Error"}}, true},
{CDM::CXXMethod, {"ARMAsmParser", "Error"}},
{CDM::CXXMethod, {"HexagonAsmParser", "Error"}},
{CDM::CXXMethod, {"LLLexer", "Error"}},
{CDM::CXXMethod, {"LLParser", "Error"}},
{CDM::CXXMethod, {"MCAsmParser", "Error"}},
{CDM::CXXMethod, {"MCAsmParserExtension", "Error"}},
{CDM::CXXMethod, {"TGParser", "Error"}},
{CDM::CXXMethod, {"X86AsmParser", "Error"}},
// 'TokError()'
{{{"LLParser", "TokError"}}, true},
{{{"MCAsmParser", "TokError"}}, true},
{{{"MCAsmParserExtension", "TokError"}}, true},
{{{"TGParser", "TokError"}}, true},
{CDM::CXXMethod, {"LLParser", "TokError"}},
{CDM::CXXMethod, {"MCAsmParser", "TokError"}},
{CDM::CXXMethod, {"MCAsmParserExtension", "TokError"}},
{CDM::CXXMethod, {"TGParser", "TokError"}},
// 'error()'
{{{"MIParser", "error"}}, true},
{{{"WasmAsmParser", "error"}}, true},
{{{"WebAssemblyAsmParser", "error"}}, true},
{CDM::CXXMethod, {"MIParser", "error"}},
{CDM::CXXMethod, {"WasmAsmParser", "error"}},
{CDM::CXXMethod, {"WebAssemblyAsmParser", "error"}},
// Other
{{{"AsmParser", "printError"}}, true}};
{CDM::CXXMethod, {"AsmParser", "printError"}}};
};
} // namespace

Expand All @@ -68,100 +70,32 @@ static std::string getName(const CallEvent &Call) {
return Name;
}

// The predefinitions ('CDM') could break due to the ever growing code base.
// Check for the expected invariants and see whether they apply.
static std::optional<bool> isInvariantBreak(bool ExpectedValue, SVal ReturnV,
CheckerContext &C) {
auto ReturnDV = ReturnV.getAs<DefinedOrUnknownSVal>();
if (!ReturnDV)
return std::nullopt;

if (ExpectedValue)
return C.getState()->isNull(*ReturnDV).isConstrainedTrue();

return C.getState()->isNull(*ReturnDV).isConstrainedFalse();
}

void ReturnValueChecker::checkPostCall(const CallEvent &Call,
CheckerContext &C) const {
const bool *RawExpectedValue = CDM.lookup(Call);
if (!RawExpectedValue)
if (!Methods.contains(Call))
return;

SVal ReturnV = Call.getReturnValue();
bool ExpectedValue = *RawExpectedValue;
std::optional<bool> IsInvariantBreak =
isInvariantBreak(ExpectedValue, ReturnV, C);
if (!IsInvariantBreak)
return;
auto ReturnV = Call.getReturnValue().getAs<DefinedOrUnknownSVal>();

// If the invariant is broken it is reported by 'checkEndFunction()'.
if (*IsInvariantBreak)
if (!ReturnV)
return;

std::string Name = getName(Call);
const NoteTag *CallTag = C.getNoteTag(
[Name, ExpectedValue](PathSensitiveBugReport &) -> std::string {
SmallString<128> Msg;
llvm::raw_svector_ostream Out(Msg);

Out << '\'' << Name << "' returns "
<< (ExpectedValue ? "true" : "false");
return std::string(Out.str());
},
/*IsPrunable=*/true);

ProgramStateRef State = C.getState();
State = State->assume(ReturnV.castAs<DefinedOrUnknownSVal>(), ExpectedValue);
C.addTransition(State, CallTag);
}

void ReturnValueChecker::checkEndFunction(const ReturnStmt *RS,
CheckerContext &C) const {
if (!RS || !RS->getRetValue())
if (ProgramStateRef StTrue = State->assume(*ReturnV, true)) {
// The return value can be true, so transition to a state where it's true.
std::string Msg =
formatv("'{0}' returns true (by convention)", getName(Call));
C.addTransition(StTrue, C.getNoteTag(Msg, /*IsPrunable=*/true));
return;

// We cannot get the caller in the top-frame.
const StackFrameContext *SFC = C.getStackFrame();
if (C.getStackFrame()->inTopFrame())
return;

ProgramStateRef State = C.getState();
CallEventManager &CMgr = C.getStateManager().getCallEventManager();
CallEventRef<> Call = CMgr.getCaller(SFC, State);
if (!Call)
return;

const bool *RawExpectedValue = CDM.lookup(*Call);
if (!RawExpectedValue)
return;

SVal ReturnV = State->getSVal(RS->getRetValue(), C.getLocationContext());
bool ExpectedValue = *RawExpectedValue;
std::optional<bool> IsInvariantBreak =
isInvariantBreak(ExpectedValue, ReturnV, C);
if (!IsInvariantBreak)
return;

// If the invariant is appropriate it is reported by 'checkPostCall()'.
if (!*IsInvariantBreak)
return;

std::string Name = getName(*Call);
const NoteTag *CallTag = C.getNoteTag(
[Name, ExpectedValue](BugReport &BR) -> std::string {
SmallString<128> Msg;
llvm::raw_svector_ostream Out(Msg);

// The following is swapped because the invariant is broken.
Out << '\'' << Name << "' returns "
<< (ExpectedValue ? "false" : "true");

return std::string(Out.str());
},
/*IsPrunable=*/false);

C.addTransition(State, CallTag);
}
// Paranoia: if the return value is known to be false (which is highly
// unlikely, it's easy to ensure that the method always returns true), then
// produce a note that highlights that this unusual situation.
// Note that this checker is 'hidden' so it cannot produce a bug report.
std::string Msg = formatv("'{0}' returned false, breaking the convention "
"that it always returns true",
getName(Call));
C.addTransition(State, C.getNoteTag(Msg, /*IsPrunable=*/true));
}

void ento::registerReturnValueChecker(CheckerManager &Mgr) {
Expand Down
66 changes: 33 additions & 33 deletions clang/test/Analysis/return-value-guaranteed.cpp
Original file line number Diff line number Diff line change
@@ -1,91 +1,91 @@
// RUN: %clang_analyze_cc1 \
// RUN: -analyzer-checker=core,apiModeling.llvm.ReturnValue \
// RUN: -analyzer-output=text -verify=class %s
// RUN: -analyzer-output=text -verify %s

struct Foo { int Field; };
bool problem();
void doSomething();

// We predefined the return value of 'MCAsmParser::Error' as true and we cannot
// take the false-branches which leads to a "garbage value" false positive.
namespace test_classes {
// Test the normal case when the implementation of MCAsmParser::Error() (one of
// the methods modeled by this checker) is opaque.
namespace test_normal {
struct MCAsmParser {
static bool Error();
};

bool parseFoo(Foo &F) {
if (problem()) {
// class-note@-1 {{Assuming the condition is false}}
// class-note@-2 {{Taking false branch}}
// expected-note@-1 {{Assuming the condition is false}}
// expected-note@-2 {{Taking false branch}}
return MCAsmParser::Error();
}

F.Field = 0;
// class-note@-1 {{The value 0 is assigned to 'F.Field'}}
return !MCAsmParser::Error();
// class-note@-1 {{'MCAsmParser::Error' returns true}}
// expected-note@-1 {{The value 0 is assigned to 'F.Field'}}
return false;
}

bool parseFile() {
Foo F;
if (parseFoo(F)) {
// class-note@-1 {{Calling 'parseFoo'}}
// class-note@-2 {{Returning from 'parseFoo'}}
// class-note@-3 {{Taking false branch}}
// expected-note@-1 {{Calling 'parseFoo'}}
// expected-note@-2 {{Returning from 'parseFoo'}}
// expected-note@-3 {{Taking false branch}}
return true;
}

// The following expression would produce the false positive report
// "The left operand of '==' is a garbage value"
// without the modeling done by apiModeling.llvm.ReturnValue:
if (F.Field == 0) {
// class-note@-1 {{Field 'Field' is equal to 0}}
// class-note@-2 {{Taking true branch}}

// no-warning: "The left operand of '==' is a garbage value" was here.
// expected-note@-1 {{Field 'Field' is equal to 0}}
// expected-note@-2 {{Taking true branch}}
doSomething();
}

// Trigger a zero division to get path notes:
(void)(1 / F.Field);
// class-warning@-1 {{Division by zero}}
// class-note@-2 {{Division by zero}}
// expected-warning@-1 {{Division by zero}}
// expected-note@-2 {{Division by zero}}
return false;
}
} // namespace test_classes
} // namespace test_normal


// We predefined 'MCAsmParser::Error' as returning true, but now it returns
// false, which breaks our invariant. Test the notes.
// Sanity check for the highly unlikely case where the implementation of the
// method breaks the convention.
namespace test_break {
struct MCAsmParser {
static bool Error() {
return false; // class-note {{'MCAsmParser::Error' returns false}}
return false;
}
};

bool parseFoo(Foo &F) {
if (problem()) {
// class-note@-1 {{Assuming the condition is false}}
// class-note@-2 {{Taking false branch}}
// expected-note@-1 {{Assuming the condition is false}}
// expected-note@-2 {{Taking false branch}}
return !MCAsmParser::Error();
}

F.Field = 0;
// class-note@-1 {{The value 0 is assigned to 'F.Field'}}
// expected-note@-1 {{The value 0 is assigned to 'F.Field'}}
return MCAsmParser::Error();
// class-note@-1 {{Calling 'MCAsmParser::Error'}}
// class-note@-2 {{Returning from 'MCAsmParser::Error'}}
// expected-note@-1 {{'MCAsmParser::Error' returned false, breaking the convention that it always returns true}}
}

bool parseFile() {
Foo F;
if (parseFoo(F)) {
// class-note@-1 {{Calling 'parseFoo'}}
// class-note@-2 {{Returning from 'parseFoo'}}
// class-note@-3 {{Taking false branch}}
// expected-note@-1 {{Calling 'parseFoo'}}
// expected-note@-2 {{Returning from 'parseFoo'}}
// expected-note@-3 {{Taking false branch}}
return true;
}

(void)(1 / F.Field);
// class-warning@-1 {{Division by zero}}
// class-note@-2 {{Division by zero}}
// expected-warning@-1 {{Division by zero}}
// expected-note@-2 {{Division by zero}}
return false;
}
} // namespace test_classes
} // namespace test_break

0 comments on commit 97dd8e3

Please sign in to comment.