Skip to content

Commit

Permalink
[LLHD] Add TemporalCodeMotionPass (#7381)
Browse files Browse the repository at this point in the history
  • Loading branch information
maerhart authored Jul 26, 2024
1 parent b82747c commit 6410a54
Show file tree
Hide file tree
Showing 5 changed files with 586 additions and 0 deletions.
2 changes: 2 additions & 0 deletions include/circt/Dialect/LLHD/Transforms/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ createMemoryToBlockArgumentPass();

std::unique_ptr<OperationPass<hw::HWModuleOp>> createEarlyCodeMotionPass();

std::unique_ptr<OperationPass<hw::HWModuleOp>> createTemporalCodeMotionPass();

/// Register the LLHD Transformation passes.
void initLLHDTransformationPasses();

Expand Down
17 changes: 17 additions & 0 deletions include/circt/Dialect/LLHD/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,21 @@ def EarlyCodeMotion : Pass<"llhd-early-code-motion", "hw::HWModuleOp"> {
let constructor = "circt::llhd::createEarlyCodeMotionPass()";
}

def TemporalCodeMotion : Pass<"llhd-temporal-code-motion", "hw::HWModuleOp"> {
let summary = "move drive operations to the exit basic block in processes";
let description = [{
This pass uses the temporal region analysis to transform the IR such that
every temporal region has a unique exit block and moves all 'llhd.drv'
operations in a temporal region into its exit block by adjusting the
enable operand.
Furthermore, it combines 'llhd.drv' operations driving the same signal with
the same delay by multiplexing the driven value according to their enable
operands.
This pass assumes that the early code motion pass has been run beforehand.
Otherwise, dominance errors are to be expected.
}];

let constructor = "circt::llhd::createTemporalCodeMotionPass()";
}

#endif // CIRCT_DIALECT_LLHD_TRANSFORMS_PASSES
2 changes: 2 additions & 0 deletions lib/Dialect/LLHD/Transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ add_circt_dialect_library(CIRCTLLHDTransforms
FunctionEliminationPass.cpp
MemoryToBlockArgumentPass.cpp
EarlyCodeMotionPass.cpp
TemporalCodeMotionPass.cpp

DEPENDS
CIRCTLLHDTransformsIncGen

LINK_LIBS PUBLIC
CIRCTComb
CIRCTHW
CIRCTLLHD
MLIRIR
Expand Down
342 changes: 342 additions & 0 deletions lib/Dialect/LLHD/Transforms/TemporalCodeMotionPass.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,342 @@
//===- TemporalCodeMotionPass.cpp - Implement Temporal Code Motion Pass ---===//
//
// Implement Pass to move all signal drives in a unique exiting block per
// temporal region and coalesce drives to the same signal.
//
//===----------------------------------------------------------------------===//

#include "TemporalRegions.h"
#include "circt/Dialect/Comb/CombOps.h"
#include "circt/Dialect/HW/HWOps.h"
#include "circt/Dialect/LLHD/IR/LLHDOps.h"
#include "circt/Dialect/LLHD/Transforms/Passes.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/Region.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/STLExtras.h"
#include <queue>

namespace circt {
namespace llhd {
#define GEN_PASS_DEF_TEMPORALCODEMOTION
#include "circt/Dialect/LLHD/Transforms/Passes.h.inc"
} // namespace llhd
} // namespace circt

using namespace circt;
using namespace mlir;

/// Explore all paths from the 'driveBlock' to the 'dominator' block and
/// construct a boolean expression at the current insertion point of 'builder'
/// to represent all those paths.
static Value
getBranchDecisionsFromDominatorToTarget(OpBuilder &builder, Block *driveBlock,
Block *dominator,
DenseMap<Block *, Value> &mem) {
Location loc = driveBlock->getTerminator()->getLoc();
if (mem.count(driveBlock))
return mem[driveBlock];

SmallVector<Block *> worklist;
worklist.push_back(driveBlock);

while (!worklist.empty()) {
Block *curr = worklist.back();

if (curr == dominator || curr->getPredecessors().empty()) {
if (!mem.count(curr))
mem[curr] = builder.create<hw::ConstantOp>(loc, APInt(1, 1));

worklist.pop_back();
continue;
}

bool addedSomething = false;
for (auto *predBlock : curr->getPredecessors()) {
if (!mem.count(predBlock)) {
worklist.push_back(predBlock);
addedSomething = true;
}
}

if (addedSomething)
continue;

Value runner = builder.create<hw::ConstantOp>(loc, APInt(1, 0));
for (auto *predBlock : curr->getPredecessors()) {
if (predBlock->getTerminator()->getNumSuccessors() != 1) {
auto condBr = cast<cf::CondBranchOp>(predBlock->getTerminator());
Value cond = condBr.getCondition();
if (condBr.getFalseDest() == curr) {
Value trueVal = builder.create<hw::ConstantOp>(loc, APInt(1, 1));
cond = builder.create<comb::XorOp>(loc, cond, trueVal);
}
Value next = builder.create<comb::AndOp>(loc, mem[predBlock], cond);
runner = builder.create<comb::OrOp>(loc, runner, next);
} else {
runner = builder.create<comb::OrOp>(loc, runner, mem[predBlock]);
}
}
mem[curr] = runner;
worklist.pop_back();
}

return mem[driveBlock];
}

/// More a 'llhd.drv' operation before the 'moveBefore' operation by adjusting
/// the 'enable' operand.
static void moveDriveOpBefore(llhd::DrvOp drvOp, Block *dominator,
Operation *moveBefore,
DenseMap<Block *, Value> &mem) {
OpBuilder builder(drvOp);
builder.setInsertionPoint(moveBefore);
Block *drvParentBlock = drvOp->getBlock();

// Find sequence of branch decisions and add them as a sequence of
// instructions to the TR exiting block
Value finalValue = getBranchDecisionsFromDominatorToTarget(
builder, drvParentBlock, dominator, mem);

if (drvOp.getEnable())
finalValue = builder.create<comb::AndOp>(drvOp.getLoc(), drvOp.getEnable(),
finalValue);

drvOp.getEnableMutable().assign(finalValue);
drvOp->moveBefore(moveBefore);
}

namespace {
struct TemporalCodeMotionPass
: public llhd::impl::TemporalCodeMotionBase<TemporalCodeMotionPass> {
void runOnOperation() override;
LogicalResult runOnProcess(llhd::ProcessOp procOp);
};
} // namespace

void TemporalCodeMotionPass::runOnOperation() {
for (auto proc : getOperation().getOps<llhd::ProcessOp>())
(void)runOnProcess(proc); // Ignore processes that could not be lowered
}

LogicalResult TemporalCodeMotionPass::runOnProcess(llhd::ProcessOp procOp) {
llhd::TemporalRegionAnalysis trAnalysis =
llhd::TemporalRegionAnalysis(procOp);
unsigned numTRs = trAnalysis.getNumTemporalRegions();

// Only support processes with max. 2 temporal regions and one wait terminator
// as this is enough to represent flip-flops, registers, etc.
// NOTE: there always has to be either a wait or halt terminator in a process
// If the wait block creates the backwards edge, we only have one TR,
// otherwise we have 2 TRs
// NOTE: as the wait instruction needs to be on every path around the loop,
// it has to be the only exiting block of its TR
// NOTE: the other TR can either have only one exiting block, then we do not
// need to add an auxillary block, otherwise we have to add one
// NOTE: All drive operations have to be moved to the single exiting block of
// its TR. To do so, add the condition under which its block is reached from
// the TR entry block as a gating condition to the 'llhd.drv' operation
// NOTE: the entry blocks that are not part of the infinite loop do not count
// as TR and have TR number -1
// TODO: need to check that entry blocks that are note part of the loop to not
// have any instructions that have side effects that should not be allowed
// outside of the loop (drv, prb, ...)
// TODO: add support for more TRs and wait terminators (e.g., to represent
// FSMs)
if (numTRs > 2)
return failure();

bool seenWait = false;
WalkResult walkResult = procOp.walk([&](llhd::WaitOp op) -> WalkResult {
if (seenWait)
return failure();

// Check that the block containing the wait is the only exiting block of
// that TR
int trId = trAnalysis.getBlockTR(op->getBlock());
if (!trAnalysis.hasSingleExitBlock(trId))
return failure();

seenWait = true;
return WalkResult::advance();
});
if (walkResult.wasInterrupted())
return failure();

//===--------------------------------------------------------------------===//
// Create unique exit block per TR
//===--------------------------------------------------------------------===//

// TODO: consider the case where a wait brances to itself
for (unsigned currTR = 0; currTR < numTRs; ++currTR) {
unsigned numTRSuccs = trAnalysis.getNumTRSuccessors(currTR);
// NOTE: Above error checks make this impossible to trigger, but the above
// are changed this one might have to be promoted to a proper error message.
assert((numTRSuccs == 1 ||
(numTRSuccs == 2 && trAnalysis.isOwnTRSuccessor(currTR))) &&
"only TRs with a single TR as possible successor are "
"supported for now.");

if (trAnalysis.hasSingleExitBlock(currTR))
continue;

// Get entry block of successor TR
Block *succTREntry =
trAnalysis.getTREntryBlock(*trAnalysis.getTRSuccessors(currTR).begin());

// Create the auxillary block as we currently don't have a single exiting
// block and give it the same arguments as the entry block of the
// successor TR
Block *auxBlock = new Block();
auxBlock->addArguments(
succTREntry->getArgumentTypes(),
SmallVector<Location>(succTREntry->getNumArguments(), procOp.getLoc()));

// Insert the auxillary block after the last block of the current TR
procOp.getBody().getBlocks().insertAfter(
Region::iterator(trAnalysis.getExitingBlocksInTR(currTR).back()),
auxBlock);

// Let all current exit blocks branch to the auxillary block instead.
for (Block *exit : trAnalysis.getExitingBlocksInTR(currTR))
for (auto [i, succ] : llvm::enumerate(exit->getSuccessors()))
if (trAnalysis.getBlockTR(succ) != static_cast<int>(currTR))
exit->getTerminator()->setSuccessor(auxBlock, i);

// Let the auxiallary block branch to the entry block of the successor
// temporal region entry block
OpBuilder b(procOp);
b.setInsertionPointToEnd(auxBlock);
b.create<cf::BranchOp>(procOp.getLoc(), succTREntry,
auxBlock->getArguments());
}

//===--------------------------------------------------------------------===//
// Move drive instructions
//===--------------------------------------------------------------------===//

DenseMap<Operation *, Block *> drvPos;

// Force a new analysis as we have changed the CFG
trAnalysis = llhd::TemporalRegionAnalysis(procOp);
numTRs = trAnalysis.getNumTemporalRegions();
OpBuilder builder(procOp);

for (unsigned currTR = 0; currTR < numTRs; ++currTR) {
DenseMap<Block *, Value> mem;

// We ensured this in the previous phase above.
assert(trAnalysis.getExitingBlocksInTR(currTR).size() == 1);

Block *exitingBlock = trAnalysis.getExitingBlocksInTR(currTR)[0];
Block *entryBlock = trAnalysis.getTREntryBlock(currTR);

DominanceInfo dom(procOp);
Block *dominator = exitingBlock;

// Collect all 'llhd.drv' operations in the process and compute their common
// dominator block.
procOp.walk([&](llhd::DrvOp op) {
if (trAnalysis.getBlockTR(op.getOperation()->getBlock()) ==
static_cast<int>(currTR)) {
Block *parentBlock = op.getOperation()->getBlock();
drvPos[op] = parentBlock;
dominator = dom.findNearestCommonDominator(dominator, parentBlock);
}
});

// Set insertion point before first 'llhd.drv' op in the exiting block
Operation *moveBefore = exitingBlock->getTerminator();
exitingBlock->walk([&](llhd::DrvOp op) { moveBefore = op; });

assert(dominator &&
"could not find nearest common dominator for TR exiting "
"block and the block containing drv");

// If the dominator isn't already a TR entry block, set it to the nearest
// dominating TR entry block.
if (trAnalysis.getBlockTR(dominator) != static_cast<int>(currTR))
dominator = trAnalysis.getTREntryBlock(currTR);

std::queue<Block *> workQueue;
SmallPtrSet<Block *, 32> workDone;

if (entryBlock != exitingBlock)
workQueue.push(entryBlock);

while (!workQueue.empty()) {
Block *block = workQueue.front();
workQueue.pop();
workDone.insert(block);

builder.setInsertionPoint(moveBefore);
SmallVector<llhd::DrvOp> drives(block->getOps<llhd::DrvOp>());
for (auto drive : drives)
moveDriveOpBefore(drive, dominator, moveBefore, mem);

for (Block *succ : block->getSuccessors()) {
if (succ == exitingBlock ||
trAnalysis.getBlockTR(succ) != static_cast<int>(currTR))
continue;

if (llvm::all_of(succ->getPredecessors(), [&](Block *block) {
return workDone.contains(block);
}))
workQueue.push(succ);
}
}
}

//===--------------------------------------------------------------------===//
// Coalesce multiple drives to the same signal
//===--------------------------------------------------------------------===//

DominanceInfo dom(procOp);
for (unsigned currTR = 0; currTR < numTRs; ++currTR) {
// We ensured this in the previous phase above.
assert(trAnalysis.getExitingBlocksInTR(currTR).size() == 1);

Block *exitingBlock = trAnalysis.getExitingBlocksInTR(currTR)[0];
DenseMap<std::pair<Value, Value>, llhd::DrvOp> sigToDrv;

SmallVector<llhd::DrvOp> drives(exitingBlock->getOps<llhd::DrvOp>());
for (auto op : drives) {
auto sigTimePair = std::make_pair(op.getSignal(), op.getTime());
if (!sigToDrv.count(sigTimePair)) {
sigToDrv[sigTimePair] = op;
continue;
}

OpBuilder builder(op);
if (op.getEnable()) {
// Multiplex value to be driven
auto firstDrive = sigToDrv[sigTimePair];
Value muxValue = builder.create<comb::MuxOp>(
op.getLoc(), op.getEnable(), op.getValue(), firstDrive.getValue());
op.getValueMutable().assign(muxValue);

// Take the disjunction of the enable conditions
if (firstDrive.getEnable()) {
Value orVal = builder.create<comb::OrOp>(op.getLoc(), op.getEnable(),
firstDrive.getEnable());
op.getEnableMutable().assign(orVal);
} else {
// No enable is equivalent to a constant 'true' enable
op.getEnableMutable().clear();
}
}

sigToDrv[sigTimePair]->erase();
sigToDrv[sigTimePair] = op;
}
}

return success();
}

std::unique_ptr<OperationPass<hw::HWModuleOp>>
circt::llhd::createTemporalCodeMotionPass() {
return std::make_unique<TemporalCodeMotionPass>();
}
Loading

0 comments on commit 6410a54

Please sign in to comment.