2020#include " mlir/IR/PatternMatch.h"
2121#include " mlir/Pass/Pass.h"
2222#include " mlir/Support/LogicalResult.h"
23+ #include " mlir/Transforms/CSE.h"
2324#include " mlir/Transforms/GreedyPatternRewriteDriver.h"
25+ #include " mlir/Transforms/LoopInvariantCodeMotionUtils.h"
2426#include " llvm/Support/Casting.h"
2527#include < deque>
2628#include < optional>
@@ -57,6 +59,7 @@ bool is_innermost_operation(Operation *op) {
5759 inner_most = false ;
5860 return WalkResult::interrupt ();
5961 }
62+ return WalkResult::advance ();
6063 });
6164 return inner_most;
6265}
@@ -861,9 +864,10 @@ void generateGroupOpVectorizedIR(
861864 // 3 Update loop result uses
862865 updateLoopResultUses (idx, opGroups.size (), groupResultYeildSet, func,
863866 &forOp.value (), mapOpResultToYield);
867+ moveLoopInvariantCode (forOp.value ());
864868}
865869
866- // / Pass that lower to tile vector.
870+ // / Pass that lower to physical vector.
867871struct CPUPhysicalRegisterPass
868872 : public impl::CPUPhysicalRegisterPassBase<CPUPhysicalRegisterPass> {
869873
@@ -882,6 +886,8 @@ struct CPUPhysicalRegisterPass
882886 // dependency.
883887 // d. reduction. Need to analysis broadcast dim and the
884888 // data dependency.
889+ // Same group operations have no data dependencies. They can be fused into a
890+ // common for loop body.
885891
886892 // Using queue to store the operation order. In order to ensure that
887893 // subsequent moves to the operation will not cause semantic changes.
@@ -953,6 +959,9 @@ struct CPUPhysicalRegisterPass
953959 groupOpDestination, mapOpResultToYield, func,
954960 opPermuationMap);
955961 }
962+ DominanceInfo domInfo;
963+ auto reWriter = IRRewriter (func);
964+ eliminateCommonSubExpressions (reWriter, domInfo, func);
956965 }
957966};
958967} // namespace
0 commit comments