20
20
#include " mlir/IR/PatternMatch.h"
21
21
#include " mlir/Pass/Pass.h"
22
22
#include " mlir/Support/LogicalResult.h"
23
+ #include " mlir/Transforms/CSE.h"
23
24
#include " mlir/Transforms/GreedyPatternRewriteDriver.h"
25
+ #include " mlir/Transforms/LoopInvariantCodeMotionUtils.h"
24
26
#include " llvm/Support/Casting.h"
25
27
#include < deque>
26
28
#include < optional>
@@ -57,6 +59,7 @@ bool is_innermost_operation(Operation *op) {
57
59
inner_most = false ;
58
60
return WalkResult::interrupt ();
59
61
}
62
+ return WalkResult::advance ();
60
63
});
61
64
return inner_most;
62
65
}
@@ -861,9 +864,10 @@ void generateGroupOpVectorizedIR(
861
864
// 3 Update loop result uses
862
865
updateLoopResultUses (idx, opGroups.size (), groupResultYeildSet, func,
863
866
&forOp.value (), mapOpResultToYield);
867
+ moveLoopInvariantCode (forOp.value ());
864
868
}
865
869
866
- // / Pass that lower to tile vector.
870
+ // / Pass that lower to physical vector.
867
871
struct CPUPhysicalRegisterPass
868
872
: public impl::CPUPhysicalRegisterPassBase<CPUPhysicalRegisterPass> {
869
873
@@ -882,6 +886,8 @@ struct CPUPhysicalRegisterPass
882
886
// dependency.
883
887
// d. reduction. Need to analysis broadcast dim and the
884
888
// data dependency.
889
+ // Same group operations have no data dependencies. They can be fused into a
890
+ // common for loop body.
885
891
886
892
// Using queue to store the operation order. In order to ensure that
887
893
// subsequent moves to the operation will not cause semantic changes.
@@ -953,6 +959,9 @@ struct CPUPhysicalRegisterPass
953
959
groupOpDestination, mapOpResultToYield, func,
954
960
opPermuationMap);
955
961
}
962
+ DominanceInfo domInfo;
963
+ auto reWriter = IRRewriter (func);
964
+ eliminateCommonSubExpressions (reWriter, domInfo, func);
956
965
}
957
966
};
958
967
} // namespace
0 commit comments