Skip to content

Commit f25fc5f

Browse files
authored
[LifetimeSafety] Implement dataflow analysis for loan propagation (#148065)
This patch introduces the core dataflow analysis infrastructure for the C++ Lifetime Safety checker. This change implements the logic to propagate "loan" information across the control-flow graph. The primary goal is to compute a fixed-point state that accurately models which pointer (Origin) can hold which borrow (Loan) at any given program point. Key components * `LifetimeLattice`: Defines the dataflow state, mapping an `OriginID` to a `LoanSet` using `llvm::ImmutableMap`. * `Transferer`: Implements the transfer function, which updates the `LifetimeLattice` by applying the lifetime facts (Issue, AssignOrigin, etc.) generated for each basic block. * `LifetimeDataflow`: A forward dataflow analysis driver that uses a worklist algorithm to iterate over the CFG until the lattice state converges. The existing test suite has been extended to check the final dataflow results. This work is a prerequisite for the final step of the analysis: consuming these results to identify and report lifetime violations.
1 parent 309bb1e commit f25fc5f

File tree

2 files changed

+441
-2
lines changed

2 files changed

+441
-2
lines changed

clang/lib/Analysis/LifetimeSafety.cpp

Lines changed: 253 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@
1313
#include "clang/Analysis/Analyses/PostOrderCFGView.h"
1414
#include "clang/Analysis/AnalysisDeclContext.h"
1515
#include "clang/Analysis/CFG.h"
16+
#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
1617
#include "llvm/ADT/FoldingSet.h"
18+
#include "llvm/ADT/ImmutableMap.h"
19+
#include "llvm/ADT/ImmutableSet.h"
1720
#include "llvm/ADT/PointerUnion.h"
1821
#include "llvm/ADT/SmallVector.h"
1922
#include "llvm/Support/Debug.h"
@@ -493,7 +496,243 @@ class FactGenerator : public ConstStmtVisitor<FactGenerator> {
493496
};
494497

495498
// ========================================================================= //
496-
// TODO: Run dataflow analysis to propagate loans, analyse and error reporting.
499+
// The Dataflow Lattice
500+
// ========================================================================= //
501+
502+
// Using LLVM's immutable collections is efficient for dataflow analysis
503+
// as it avoids deep copies during state transitions.
504+
// TODO(opt): Consider using a bitset to represent the set of loans.
505+
using LoanSet = llvm::ImmutableSet<LoanID>;
506+
using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>;
507+
508+
/// An object to hold the factories for immutable collections, ensuring
509+
/// that all created states share the same underlying memory management.
510+
struct LifetimeFactory {
511+
OriginLoanMap::Factory OriginMapFactory;
512+
LoanSet::Factory LoanSetFact;
513+
514+
/// Creates a singleton set containing only the given loan ID.
515+
LoanSet createLoanSet(LoanID LID) {
516+
return LoanSetFact.add(LoanSetFact.getEmptySet(), LID);
517+
}
518+
};
519+
520+
/// LifetimeLattice represents the state of our analysis at a given program
521+
/// point. It is an immutable object, and all operations produce a new
522+
/// instance rather than modifying the existing one.
523+
struct LifetimeLattice {
524+
/// The map from an origin to the set of loans it contains.
525+
/// The lattice has a finite height: An origin's loan set is bounded by the
526+
/// total number of loans in the function.
527+
/// TODO(opt): To reduce the lattice size, propagate origins of declarations,
528+
/// not expressions, because expressions are not visible across blocks.
529+
OriginLoanMap Origins = OriginLoanMap(nullptr);
530+
531+
explicit LifetimeLattice(const OriginLoanMap &S) : Origins(S) {}
532+
LifetimeLattice() = default;
533+
534+
bool operator==(const LifetimeLattice &Other) const {
535+
return Origins == Other.Origins;
536+
}
537+
bool operator!=(const LifetimeLattice &Other) const {
538+
return !(*this == Other);
539+
}
540+
541+
LoanSet getLoans(OriginID OID) const {
542+
if (auto *Loans = Origins.lookup(OID))
543+
return *Loans;
544+
return LoanSet(nullptr);
545+
}
546+
547+
/// Computes the union of two lattices by performing a key-wise join of
548+
/// their OriginLoanMaps.
549+
// TODO(opt): This key-wise join is a performance bottleneck. A more
550+
// efficient merge could be implemented using a Patricia Trie or HAMT
551+
// instead of the current AVL-tree-based ImmutableMap.
552+
// TODO(opt): Keep the state small by removing origins which become dead.
553+
LifetimeLattice join(const LifetimeLattice &Other,
554+
LifetimeFactory &Factory) const {
555+
/// Merge the smaller map into the larger one ensuring we iterate over the
556+
/// smaller map.
557+
if (Origins.getHeight() < Other.Origins.getHeight())
558+
return Other.join(*this, Factory);
559+
560+
OriginLoanMap JoinedState = Origins;
561+
// For each origin in the other map, union its loan set with ours.
562+
for (const auto &Entry : Other.Origins) {
563+
OriginID OID = Entry.first;
564+
LoanSet OtherLoanSet = Entry.second;
565+
JoinedState = Factory.OriginMapFactory.add(
566+
JoinedState, OID, join(getLoans(OID), OtherLoanSet, Factory));
567+
}
568+
return LifetimeLattice(JoinedState);
569+
}
570+
571+
LoanSet join(LoanSet a, LoanSet b, LifetimeFactory &Factory) const {
572+
/// Merge the smaller set into the larger one ensuring we iterate over the
573+
/// smaller set.
574+
if (a.getHeight() < b.getHeight())
575+
std::swap(a, b);
576+
LoanSet Result = a;
577+
for (LoanID LID : b) {
578+
/// TODO(opt): Profiling shows that this loop is a major performance
579+
/// bottleneck. Investigate using a BitVector to represent the set of
580+
/// loans for improved join performance.
581+
Result = Factory.LoanSetFact.add(Result, LID);
582+
}
583+
return Result;
584+
}
585+
586+
void dump(llvm::raw_ostream &OS) const {
587+
OS << "LifetimeLattice State:\n";
588+
if (Origins.isEmpty())
589+
OS << " <empty>\n";
590+
for (const auto &Entry : Origins) {
591+
if (Entry.second.isEmpty())
592+
OS << " Origin " << Entry.first << " contains no loans\n";
593+
for (const LoanID &LID : Entry.second)
594+
OS << " Origin " << Entry.first << " contains Loan " << LID << "\n";
595+
}
596+
}
597+
};
598+
599+
// ========================================================================= //
600+
// The Transfer Function
601+
// ========================================================================= //
602+
class Transferer {
603+
FactManager &AllFacts;
604+
LifetimeFactory &Factory;
605+
606+
public:
607+
explicit Transferer(FactManager &F, LifetimeFactory &Factory)
608+
: AllFacts(F), Factory(Factory) {}
609+
610+
/// Computes the exit state of a block by applying all its facts sequentially
611+
/// to a given entry state.
612+
/// TODO: We might need to store intermediate states per-fact in the block for
613+
/// later analysis.
614+
LifetimeLattice transferBlock(const CFGBlock *Block,
615+
LifetimeLattice EntryState) {
616+
LifetimeLattice BlockState = EntryState;
617+
llvm::ArrayRef<const Fact *> Facts = AllFacts.getFacts(Block);
618+
619+
for (const Fact *F : Facts) {
620+
BlockState = transferFact(BlockState, F);
621+
}
622+
return BlockState;
623+
}
624+
625+
private:
626+
LifetimeLattice transferFact(LifetimeLattice In, const Fact *F) {
627+
switch (F->getKind()) {
628+
case Fact::Kind::Issue:
629+
return transfer(In, *F->getAs<IssueFact>());
630+
case Fact::Kind::AssignOrigin:
631+
return transfer(In, *F->getAs<AssignOriginFact>());
632+
// Expire and ReturnOfOrigin facts don't modify the Origins and the State.
633+
case Fact::Kind::Expire:
634+
case Fact::Kind::ReturnOfOrigin:
635+
return In;
636+
}
637+
llvm_unreachable("Unknown fact kind");
638+
}
639+
640+
/// A new loan is issued to the origin. Old loans are erased.
641+
LifetimeLattice transfer(LifetimeLattice In, const IssueFact &F) {
642+
OriginID OID = F.getOriginID();
643+
LoanID LID = F.getLoanID();
644+
return LifetimeLattice(Factory.OriginMapFactory.add(
645+
In.Origins, OID, Factory.createLoanSet(LID)));
646+
}
647+
648+
/// The destination origin's loan set is replaced by the source's.
649+
/// This implicitly "resets" the old loans of the destination.
650+
LifetimeLattice transfer(LifetimeLattice InState, const AssignOriginFact &F) {
651+
OriginID DestOID = F.getDestOriginID();
652+
OriginID SrcOID = F.getSrcOriginID();
653+
LoanSet SrcLoans = InState.getLoans(SrcOID);
654+
return LifetimeLattice(
655+
Factory.OriginMapFactory.add(InState.Origins, DestOID, SrcLoans));
656+
}
657+
};
658+
659+
// ========================================================================= //
660+
// Dataflow analysis
661+
// ========================================================================= //
662+
663+
/// Drives the intra-procedural dataflow analysis.
664+
///
665+
/// Orchestrates the analysis by iterating over the CFG using a worklist
666+
/// algorithm. It computes a fixed point by propagating the LifetimeLattice
667+
/// state through each block until the state no longer changes.
668+
/// TODO: Maybe use the dataflow framework! The framework might need changes
669+
/// to support the current comparison done at block-entry.
670+
class LifetimeDataflow {
671+
const CFG &Cfg;
672+
AnalysisDeclContext &AC;
673+
LifetimeFactory LifetimeFact;
674+
675+
Transferer Xfer;
676+
677+
/// Stores the merged analysis state at the entry of each CFG block.
678+
llvm::DenseMap<const CFGBlock *, LifetimeLattice> BlockEntryStates;
679+
/// Stores the analysis state at the exit of each CFG block, after the
680+
/// transfer function has been applied.
681+
llvm::DenseMap<const CFGBlock *, LifetimeLattice> BlockExitStates;
682+
683+
public:
684+
LifetimeDataflow(const CFG &C, FactManager &FS, AnalysisDeclContext &AC)
685+
: Cfg(C), AC(AC), Xfer(FS, LifetimeFact) {}
686+
687+
void run() {
688+
llvm::TimeTraceScope TimeProfile("Lifetime Dataflow");
689+
ForwardDataflowWorklist Worklist(Cfg, AC);
690+
const CFGBlock *Entry = &Cfg.getEntry();
691+
BlockEntryStates[Entry] = LifetimeLattice{};
692+
Worklist.enqueueBlock(Entry);
693+
while (const CFGBlock *B = Worklist.dequeue()) {
694+
LifetimeLattice EntryState = getEntryState(B);
695+
LifetimeLattice ExitState = Xfer.transferBlock(B, EntryState);
696+
BlockExitStates[B] = ExitState;
697+
698+
for (const CFGBlock *Successor : B->succs()) {
699+
auto SuccIt = BlockEntryStates.find(Successor);
700+
LifetimeLattice OldSuccEntryState = (SuccIt != BlockEntryStates.end())
701+
? SuccIt->second
702+
: LifetimeLattice{};
703+
LifetimeLattice NewSuccEntryState =
704+
OldSuccEntryState.join(ExitState, LifetimeFact);
705+
// Enqueue the successor if its entry state has changed.
706+
// TODO(opt): Consider changing 'join' to report a change if !=
707+
// comparison is found expensive.
708+
if (SuccIt == BlockEntryStates.end() ||
709+
NewSuccEntryState != OldSuccEntryState) {
710+
BlockEntryStates[Successor] = NewSuccEntryState;
711+
Worklist.enqueueBlock(Successor);
712+
}
713+
}
714+
}
715+
}
716+
717+
void dump() const {
718+
llvm::dbgs() << "==========================================\n";
719+
llvm::dbgs() << " Dataflow results:\n";
720+
llvm::dbgs() << "==========================================\n";
721+
const CFGBlock &B = Cfg.getExit();
722+
getExitState(&B).dump(llvm::dbgs());
723+
}
724+
725+
LifetimeLattice getEntryState(const CFGBlock *B) const {
726+
return BlockEntryStates.lookup(B);
727+
}
728+
729+
LifetimeLattice getExitState(const CFGBlock *B) const {
730+
return BlockExitStates.lookup(B);
731+
}
732+
};
733+
734+
// ========================================================================= //
735+
// TODO: Analysing dataflow results and error reporting.
497736
// ========================================================================= //
498737
} // anonymous namespace
499738

@@ -506,5 +745,18 @@ void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg,
506745
FactGenerator FactGen(FactMgr, AC);
507746
FactGen.run();
508747
DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC));
748+
749+
/// TODO(opt): Consider optimizing individual blocks before running the
750+
/// dataflow analysis.
751+
/// 1. Expression Origins: These are assigned once and read at most once,
752+
/// forming simple chains. These chains can be compressed into a single
753+
/// assignment.
754+
/// 2. Block-Local Loans: Origins of expressions are never read by other
755+
/// blocks; only Decls are visible. Therefore, loans in a block that
756+
/// never reach an Origin associated with a Decl can be safely dropped by
757+
/// the analysis.
758+
LifetimeDataflow Dataflow(Cfg, FactMgr, AC);
759+
Dataflow.run();
760+
DEBUG_WITH_TYPE("LifetimeDataflow", Dataflow.dump());
509761
}
510762
} // namespace clang

0 commit comments

Comments
 (0)