Skip to content

Commit

Permalink
[clang][analyzer] Stable order for SymbolRef-keyed containers (llvm#1…
Browse files Browse the repository at this point in the history
…21551)

Generalize the `SymbolID`s used for `SymbolData` to all `SymExpr`s and
use these IDs for comparison `SymbolRef` keys in various containers,
such as `ConstraintMap`. These IDs are superior to raw pointer values
because they are more controllable and are not randomized across
executions (unlike
[pointers](https://en.wikipedia.org/wiki/Address_space_layout_randomization)).

These IDs order is stable across runs because SymExprs are allocated in
the same order.

Stability of the constraint order is important for the stability of the
analyzer results. I evaluated this change on a set of 200+ open-source C
and C++ projects with the total number of ~78 000 symbolic-execution
issues passing Z3 refutation.

This patch reduced the run-to-run churn (flakiness) in SE issues from
80-90 to 30-40 (out of 78K) in our CSA deployment (in our setting flaky
issues are mostly due to Z3 refutation instability).

Note, most of the issue churn (flakiness) is caused by the mentioned Z3
refutation. With Z3 refutation disabled, issue churn goes down to ~10
issues out of 83K and this patch has no effect on appearing/disappearing
issues between runs. It however, seems to reduce the volatility of the
execution flow: before we had 40-80 issues with changed execution flow,
after - 10-30.

Importantly, this change is necessary for the next step in stabilizing
analysis results by caching Z3 query outcomes between analysis runs
(work in progress).

Across our admittedly noisy CI runs, I detected no significant effect on
memory footprint or analysis time.

CPP-5919
  • Loading branch information
necto authored Jan 3, 2025
1 parent 39a9073 commit 0844f83
Show file tree
Hide file tree
Showing 11 changed files with 149 additions and 85 deletions.
31 changes: 22 additions & 9 deletions clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ namespace ento {

class MemRegion;

using SymbolID = unsigned;

/// Symbolic value. These values used to capture symbolic execution of
/// the program.
class SymExpr : public llvm::FoldingSetNode {
Expand All @@ -39,9 +41,19 @@ class SymExpr : public llvm::FoldingSetNode {

private:
Kind K;
/// A unique identifier for this symbol.
///
/// It is useful for SymbolData to easily differentiate multiple symbols, but
/// also for "ephemeral" symbols, such as binary operations, because this id
/// can be used for arranging constraints or equivalence classes instead of
/// unstable pointer values.
///
/// Note, however, that it can't be used in Profile because SymbolManager
/// needs to compute Profile before allocating SymExpr.
const SymbolID Sym;

protected:
SymExpr(Kind k) : K(k) {}
SymExpr(Kind k, SymbolID Sym) : K(k), Sym(Sym) {}

static bool isValidTypeForSymbol(QualType T) {
// FIXME: Depending on whether we choose to deprecate structural symbols,
Expand All @@ -56,6 +68,14 @@ class SymExpr : public llvm::FoldingSetNode {

Kind getKind() const { return K; }

/// Get a unique identifier for this symbol.
/// The ID is unique across all SymExprs in a SymbolManager.
/// They reflect the allocation order of these SymExprs,
/// and are likely stable across runs.
/// Used as a key in SymbolRef containers and as part of identity
/// for SymbolData, e.g. SymbolConjured with ID = 7 is "conj_$7".
SymbolID getSymbolID() const { return Sym; }

virtual void dump() const;

virtual void dumpToStream(raw_ostream &os) const {}
Expand Down Expand Up @@ -112,28 +132,21 @@ inline raw_ostream &operator<<(raw_ostream &os,

using SymbolRef = const SymExpr *;
using SymbolRefSmallVectorTy = SmallVector<SymbolRef, 2>;
using SymbolID = unsigned;

/// A symbol representing data which can be stored in a memory location
/// (region).
class SymbolData : public SymExpr {
const SymbolID Sym;

void anchor() override;

protected:
SymbolData(Kind k, SymbolID sym) : SymExpr(k), Sym(sym) {
assert(classof(this));
}
SymbolData(Kind k, SymbolID sym) : SymExpr(k, sym) { assert(classof(this)); }

public:
~SymbolData() override = default;

/// Get a string representation of the kind of the region.
virtual StringRef getKindStr() const = 0;

SymbolID getSymbolID() const { return Sym; }

unsigned computeComplexity() const override {
return 1;
};
Expand Down
100 changes: 78 additions & 22 deletions clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/ImmutableSet.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Allocator.h"
#include <cassert>
Expand All @@ -43,15 +44,16 @@ class StoreManager;
class SymbolRegionValue : public SymbolData {
const TypedValueRegion *R;

public:
friend class SymExprAllocator;
SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
: SymbolData(SymbolRegionValueKind, sym), R(r) {
assert(r);
assert(isValidTypeForSymbol(r->getValueType()));
}

public:
LLVM_ATTRIBUTE_RETURNS_NONNULL
const TypedValueRegion* getRegion() const { return R; }
const TypedValueRegion *getRegion() const { return R; }

static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) {
profile.AddInteger((unsigned) SymbolRegionValueKind);
Expand Down Expand Up @@ -84,7 +86,7 @@ class SymbolConjured : public SymbolData {
const LocationContext *LCtx;
const void *SymbolTag;

public:
friend class SymExprAllocator;
SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx,
QualType t, unsigned count, const void *symbolTag)
: SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count),
Expand All @@ -98,6 +100,7 @@ class SymbolConjured : public SymbolData {
assert(isValidTypeForSymbol(t));
}

public:
/// It might return null.
const Stmt *getStmt() const { return S; }
unsigned getCount() const { return Count; }
Expand Down Expand Up @@ -137,14 +140,15 @@ class SymbolDerived : public SymbolData {
SymbolRef parentSymbol;
const TypedValueRegion *R;

public:
friend class SymExprAllocator;
SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
: SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) {
assert(parent);
assert(r);
assert(isValidTypeForSymbol(r->getValueType()));
}

public:
LLVM_ATTRIBUTE_RETURNS_NONNULL
SymbolRef getParentSymbol() const { return parentSymbol; }
LLVM_ATTRIBUTE_RETURNS_NONNULL
Expand Down Expand Up @@ -180,12 +184,13 @@ class SymbolDerived : public SymbolData {
class SymbolExtent : public SymbolData {
const SubRegion *R;

public:
friend class SymExprAllocator;
SymbolExtent(SymbolID sym, const SubRegion *r)
: SymbolData(SymbolExtentKind, sym), R(r) {
assert(r);
}

public:
LLVM_ATTRIBUTE_RETURNS_NONNULL
const SubRegion *getRegion() const { return R; }

Expand Down Expand Up @@ -222,7 +227,7 @@ class SymbolMetadata : public SymbolData {
unsigned Count;
const void *Tag;

public:
friend class SymExprAllocator;
SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
const LocationContext *LCtx, unsigned count, const void *tag)
: SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx),
Expand All @@ -234,6 +239,7 @@ class SymbolMetadata : public SymbolData {
assert(tag);
}

public:
LLVM_ATTRIBUTE_RETURNS_NONNULL
const MemRegion *getRegion() const { return R; }

Expand Down Expand Up @@ -286,15 +292,16 @@ class SymbolCast : public SymExpr {
/// The type of the result.
QualType ToTy;

public:
SymbolCast(const SymExpr *In, QualType From, QualType To)
: SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) {
friend class SymExprAllocator;
SymbolCast(SymbolID Sym, const SymExpr *In, QualType From, QualType To)
: SymExpr(SymbolCastKind, Sym), Operand(In), FromTy(From), ToTy(To) {
assert(In);
assert(isValidTypeForSymbol(From));
// FIXME: GenericTaintChecker creates symbols of void type.
// Otherwise, 'To' should also be a valid type.
}

public:
unsigned computeComplexity() const override {
if (Complexity == 0)
Complexity = 1 + Operand->computeComplexity();
Expand Down Expand Up @@ -332,9 +339,10 @@ class UnarySymExpr : public SymExpr {
UnaryOperator::Opcode Op;
QualType T;

public:
UnarySymExpr(const SymExpr *In, UnaryOperator::Opcode Op, QualType T)
: SymExpr(UnarySymExprKind), Operand(In), Op(Op), T(T) {
friend class SymExprAllocator;
UnarySymExpr(SymbolID Sym, const SymExpr *In, UnaryOperator::Opcode Op,
QualType T)
: SymExpr(UnarySymExprKind, Sym), Operand(In), Op(Op), T(T) {
// Note, some unary operators are modeled as a binary operator. E.g. ++x is
// modeled as x + 1.
assert((Op == UO_Minus || Op == UO_Not) && "non-supported unary expression");
Expand All @@ -345,6 +353,7 @@ class UnarySymExpr : public SymExpr {
assert(!Loc::isLocType(T) && "unary symbol should be nonloc");
}

public:
unsigned computeComplexity() const override {
if (Complexity == 0)
Complexity = 1 + Operand->computeComplexity();
Expand Down Expand Up @@ -381,8 +390,8 @@ class BinarySymExpr : public SymExpr {
QualType T;

protected:
BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t)
: SymExpr(k), Op(op), T(t) {
BinarySymExpr(SymbolID Sym, Kind k, BinaryOperator::Opcode op, QualType t)
: SymExpr(k, Sym), Op(op), T(t) {
assert(classof(this));
// Binary expressions are results of arithmetic. Pointer arithmetic is not
// handled by binary expressions, but it is instead handled by applying
Expand Down Expand Up @@ -425,14 +434,15 @@ class BinarySymExprImpl : public BinarySymExpr {
LHSTYPE LHS;
RHSTYPE RHS;

public:
BinarySymExprImpl(LHSTYPE lhs, BinaryOperator::Opcode op, RHSTYPE rhs,
QualType t)
: BinarySymExpr(ClassKind, op, t), LHS(lhs), RHS(rhs) {
friend class SymExprAllocator;
BinarySymExprImpl(SymbolID Sym, LHSTYPE lhs, BinaryOperator::Opcode op,
RHSTYPE rhs, QualType t)
: BinarySymExpr(Sym, ClassKind, op, t), LHS(lhs), RHS(rhs) {
assert(getPointer(lhs));
assert(getPointer(rhs));
}

public:
void dumpToStream(raw_ostream &os) const override {
dumpToStreamImpl(os, LHS);
dumpToStreamImpl(os, getOpcode());
Expand Down Expand Up @@ -478,6 +488,21 @@ using IntSymExpr = BinarySymExprImpl<APSIntPtr, const SymExpr *,
using SymSymExpr = BinarySymExprImpl<const SymExpr *, const SymExpr *,
SymExpr::Kind::SymSymExprKind>;

class SymExprAllocator {
SymbolID NextSymbolID = 0;
llvm::BumpPtrAllocator &Alloc;

public:
explicit SymExprAllocator(llvm::BumpPtrAllocator &Alloc) : Alloc(Alloc) {}

template <class SymT, typename... ArgsT> SymT *make(ArgsT &&...Args) {
return new (Alloc) SymT(nextID(), std::forward<ArgsT>(Args)...);
}

private:
SymbolID nextID() { return NextSymbolID++; }
};

class SymbolManager {
using DataSetTy = llvm::FoldingSet<SymExpr>;
using SymbolDependTy =
Expand All @@ -489,15 +514,14 @@ class SymbolManager {
/// alive as long as the key is live.
SymbolDependTy SymbolDependencies;

unsigned SymbolCounter = 0;
llvm::BumpPtrAllocator& BPAlloc;
SymExprAllocator Alloc;
BasicValueFactory &BV;
ASTContext &Ctx;

public:
SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
llvm::BumpPtrAllocator& bpalloc)
: SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {}
llvm::BumpPtrAllocator &bpalloc)
: SymbolDependencies(16), Alloc(bpalloc), BV(bv), Ctx(ctx) {}

static bool canSymbolicate(QualType T);

Expand Down Expand Up @@ -687,4 +711,36 @@ class SymbolVisitor {

} // namespace clang

// Override the default definition that would use pointer values of SymbolRefs
// to order them, which is unstable due to ASLR.
// Use the SymbolID instead which reflect the order in which the symbols were
// allocated. This is usually stable across runs leading to the stability of
// ConstraintMap and other containers using SymbolRef as keys.
template <>
struct ::llvm::ImutContainerInfo<clang::ento::SymbolRef>
: public ImutProfileInfo<clang::ento::SymbolRef> {
using value_type = clang::ento::SymbolRef;
using value_type_ref = clang::ento::SymbolRef;
using key_type = value_type;
using key_type_ref = value_type_ref;
using data_type = bool;
using data_type_ref = bool;

static key_type_ref KeyOfValue(value_type_ref D) { return D; }
static data_type_ref DataOfValue(value_type_ref) { return true; }

static bool isEqual(clang::ento::SymbolRef LHS, clang::ento::SymbolRef RHS) {
return LHS->getSymbolID() == RHS->getSymbolID();
}

static bool isLess(clang::ento::SymbolRef LHS, clang::ento::SymbolRef RHS) {
return LHS->getSymbolID() < RHS->getSymbolID();
}

// This might seem redundant, but it is required because of the way
// ImmutableSet is implemented through AVLTree:
// same as ImmutableMap, but with a non-informative "data".
static bool isDataEqual(data_type_ref, data_type_ref) { return true; }
};

#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H
Loading

0 comments on commit 0844f83

Please sign in to comment.