From 9663bd2a9a5cd333c7d0a0c2406e7a4a7ceaa8be Mon Sep 17 00:00:00 2001 From: Troels Henriksen Date: Mon, 4 Dec 2023 14:50:35 +0100 Subject: [PATCH] Remove in-place lowering. (#2055) This is one of our oldest optimisations, and mainly served to clean up the code produced by sequentialisation, as well as a few other simple things that we would today call "memory short circuiting". In-place lowering is a really complicated optimisation, so now that we have a propert short circuiting pass, we can get rid of it. Short circuiting is even more complicated, but it is also properly principled, and more general. We still largely have the sequentialisation problems that in-place lowering was supposed to address (mostly nested maps), but at least on the GPU backends there seems to be no real impact from it anymore. I would still like to one day fix sequentialisation so that it produces better code for nested SOACs. --- futhark.cabal | 3 - src/Futhark/CLI/Dev.hs | 19 - src/Futhark/Optimise/InPlaceLowering.hs | 416 ------------------ .../Optimise/InPlaceLowering/LowerIntoStm.hs | 376 ---------------- .../InPlaceLowering/SubstituteIndices.hs | 191 -------- src/Futhark/Passes.hs | 8 +- tests/inplacelowering0.fut | 8 - tests/inplacelowering1.fut | 8 - tests/inplacelowering2.fut | 9 - tests/inplacelowering3.fut | 8 - tests/inplacelowering4.fut | 26 -- tests/inplacelowering5.fut | 27 -- tests/inplacelowering6.fut | 11 - tests/inplacelowering7.fut | 13 - .../coalescing/copy/pos5.fut | 2 +- .../coalescing/lud/lud.fut | 2 +- .../coalescing/lud/lud_internal1-16.fut | 2 +- .../coalescing/map/map4.fut | 2 +- .../coalescing/misc/two-dim-ker.fut | 2 +- tests/soacs/map17.fut | 36 -- 20 files changed, 7 insertions(+), 1162 deletions(-) delete mode 100644 src/Futhark/Optimise/InPlaceLowering.hs delete mode 100644 src/Futhark/Optimise/InPlaceLowering/LowerIntoStm.hs delete mode 100644 src/Futhark/Optimise/InPlaceLowering/SubstituteIndices.hs delete mode 100644 tests/inplacelowering0.fut delete mode 100644 tests/inplacelowering1.fut delete mode 100644 tests/inplacelowering2.fut delete mode 100644 tests/inplacelowering3.fut delete mode 100644 tests/inplacelowering4.fut delete mode 100644 tests/inplacelowering5.fut delete mode 100644 tests/inplacelowering6.fut delete mode 100644 tests/inplacelowering7.fut delete mode 100644 tests/soacs/map17.fut diff --git a/futhark.cabal b/futhark.cabal index 8eb65c6256..c150bef6f3 100644 --- a/futhark.cabal +++ b/futhark.cabal @@ -313,9 +313,6 @@ library Futhark.Optimise.Fusion.TryFusion Futhark.Optimise.GenRedOpt Futhark.Optimise.HistAccs - Futhark.Optimise.InPlaceLowering - Futhark.Optimise.InPlaceLowering.LowerIntoStm - Futhark.Optimise.InPlaceLowering.SubstituteIndices Futhark.Optimise.InliningDeadFun Futhark.Optimise.MemoryBlockMerging Futhark.Optimise.MemoryBlockMerging.GreedyColoring diff --git a/src/Futhark/CLI/Dev.hs b/src/Futhark/CLI/Dev.hs index fd0b6737ae..40bced192c 100644 --- a/src/Futhark/CLI/Dev.hs +++ b/src/Futhark/CLI/Dev.hs @@ -35,7 +35,6 @@ import Futhark.Optimise.CSE import Futhark.Optimise.DoubleBuffer import Futhark.Optimise.Fusion import Futhark.Optimise.HistAccs -import Futhark.Optimise.InPlaceLowering import Futhark.Optimise.InliningDeadFun import Futhark.Optimise.MemoryBlockMerging qualified as MemoryBlockMerging import Futhark.Optimise.ReduceDeviceSyncs (reduceDeviceSyncs) @@ -330,23 +329,6 @@ allocateOption short = long = [passLongOption pass] pass = Seq.explicitAllocations -iplOption :: String -> FutharkOption -iplOption short = - passOption (passDescription pass) (UntypedPass perform) short long - where - perform (GPU prog) config = - GPU - <$> runPipeline (onePass inPlaceLoweringGPU) config prog - perform (Seq prog) config = - Seq - <$> runPipeline (onePass inPlaceLoweringSeq) config prog - perform s _ = - externalErrorS $ - "Pass '" ++ passDescription pass ++ "' cannot operate on " ++ representation s - - long = [passLongOption pass] - pass = inPlaceLoweringSeq - cseOption :: String -> FutharkOption cseOption short = passOption (passDescription pass) (UntypedPass perform) short long @@ -616,7 +598,6 @@ commandLineOptions = kernelsPassOption reduceDeviceSyncs [], typedPassOption soacsProg GPU extractKernels [], typedPassOption soacsProg MC extractMulticore [], - iplOption [], allocateOption "a", kernelsMemPassOption doubleBufferGPU [], mcMemPassOption doubleBufferMC [], diff --git a/src/Futhark/Optimise/InPlaceLowering.hs b/src/Futhark/Optimise/InPlaceLowering.hs deleted file mode 100644 index 84fca1fd4c..0000000000 --- a/src/Futhark/Optimise/InPlaceLowering.hs +++ /dev/null @@ -1,416 +0,0 @@ -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE UndecidableInstances #-} - --- | This module implements an optimisation that moves in-place --- updates into/before loops where possible, with the end goal of --- minimising memory copies. As an example, consider this program: --- --- @ --- let r = --- loop (r1 = r0) = for i < n do --- let a = r1[i] --- let r1[i] = a * i --- in r1 --- ... --- let x = y with [k] <- r in --- ... --- @ --- --- We want to turn this into the following: --- --- @ --- let x0 = y with [k] <- r0 --- loop (x = x0) = for i < n do --- let a = a[k,i] --- let x[k,i] = a * i --- in x --- let r = x[k] in --- ... --- @ --- --- The intent is that we are also going to optimise the new data --- movement (in the @x0@-binding), possibly by changing how @r0@ is --- defined. For the above transformation to be valid, a number of --- conditions must be fulfilled: --- --- (1) @r@ must not be consumed after the original in-place update. --- --- (2) @k@ and @y@ must be available at the beginning of the loop. --- --- (3) @x@ must be visible whenever @r@ is visible. (This means --- that both @x@ and @r@ must be bound in the same t'Body'.) --- --- (4) If @x@ is consumed at a point after the loop, @r@ must not --- be used after that point. --- --- (5) The size of @r1@ is invariant inside the loop. --- --- (6) The value @r@ must come from something that we can actually --- optimise (e.g. not a function parameter). --- --- (7) @y@ (or its aliases) may not be used inside the body of the --- loop. --- --- (8) The result of the loop may not alias the merge parameter --- @r1@. --- --- (9) @y@ or its aliases may not be used after the loop. --- --- FIXME: the implementation is not finished yet. Specifically, not --- all of the above conditions are checked. -module Futhark.Optimise.InPlaceLowering - ( inPlaceLoweringGPU, - inPlaceLoweringSeq, - inPlaceLoweringMC, - ) -where - -import Control.Monad -import Control.Monad.RWS -import Data.Map.Strict qualified as M -import Data.Ord (comparing) -import Futhark.Analysis.Alias -import Futhark.Builder -import Futhark.IR.Aliases -import Futhark.IR.GPU -import Futhark.IR.MC -import Futhark.IR.Seq (Seq) -import Futhark.Optimise.InPlaceLowering.LowerIntoStm -import Futhark.Pass -import Futhark.Util (nubByOrd) - --- | Apply the in-place lowering optimisation to the given program. -inPlaceLoweringGPU :: Pass GPU GPU -inPlaceLoweringGPU = inPlaceLowering onKernelOp lowerUpdateGPU - --- | Apply the in-place lowering optimisation to the given program. -inPlaceLoweringSeq :: Pass Seq Seq -inPlaceLoweringSeq = inPlaceLowering pure lowerUpdate - --- | Apply the in-place lowering optimisation to the given program. -inPlaceLoweringMC :: Pass MC MC -inPlaceLoweringMC = inPlaceLowering onMCOp lowerUpdate - --- | Apply the in-place lowering optimisation to the given program. -inPlaceLowering :: - (Constraints rep) => - OnOp rep -> - LowerUpdate rep (ForwardingM rep) -> - Pass rep rep -inPlaceLowering onOp lower = - Pass "In-place lowering" "Lower in-place updates into loops" $ - fmap removeProgAliases - . intraproceduralTransformationWithConsts optimiseConsts optimiseFunDef - . aliasAnalysis - where - optimiseConsts stms = - modifyNameSource $ - runForwardingM lower onOp $ - stmsFromList <$> optimiseStms (stmsToList stms) (pure ()) - - optimiseFunDef consts fundec = - modifyNameSource $ - runForwardingM lower onOp $ - descend (stmsToList consts) $ - bindingFParams (funDefParams fundec) $ do - body <- optimiseBody $ funDefBody fundec - pure $ fundec {funDefBody = body} - - descend [] m = m - descend (stm : stms) m = bindingStm stm $ descend stms m - -type Constraints rep = (Buildable rep, AliasableRep rep) - -optimiseBody :: - (Constraints rep) => - Body (Aliases rep) -> - ForwardingM rep (Body (Aliases rep)) -optimiseBody (Body als stms res) = do - stms' <- deepen $ optimiseStms (stmsToList stms) $ mapM_ (seen . resSubExp) res - pure $ Body als (stmsFromList stms') res - where - seen Constant {} = pure () - seen (Var v) = seenVar v - -optimiseStms :: - (Constraints rep) => - [Stm (Aliases rep)] -> - ForwardingM rep () -> - ForwardingM rep [Stm (Aliases rep)] -optimiseStms [] m = m >> pure [] -optimiseStms (stm : stms) m = do - (stms', bup) <- tapBottomUp $ bindingStm stm $ optimiseStms stms m - stm' <- optimiseInStm stm - -- XXX: unfortunate that we cannot handle duplicate update values. - -- Would be good to improve this. See inplacelowering6.fut. - case nubByOrd (comparing updateValue) - . filter ((`notNameIn` bottomUpSeen bup) . updateSource) -- (9) - . filter ((`elem` boundHere) . updateValue) - $ forwardThese bup of - [] -> do - checkIfForwardableUpdate stm' - pure $ stm' : stms' - updates -> do - lower <- asks topLowerUpdate - scope <- askScope - - -- If we forward any updates, we need to remove them from stms'. - let updated_names = - map updateName updates - notUpdated = - not . any (`elem` updated_names) . patNames . stmPat - - -- Condition (5) and (7) are assumed to be checked by - -- lowerUpdate. - case lower scope stm' updates of - Just lowering -> do - new_stms <- lowering - new_stms' <- optimiseStms new_stms $ tell bup {forwardThese = []} - pure $ new_stms' ++ filter notUpdated stms' - Nothing -> do - checkIfForwardableUpdate stm' - pure $ stm' : stms' - where - boundHere = patNames $ stmPat stm - - checkIfForwardableUpdate (Let pat (StmAux cs _ _) e) - | Pat [PatElem v dec] <- pat, - BasicOp (Update Unsafe src slice (Var ve)) <- e = - maybeForward ve v dec cs src slice - checkIfForwardableUpdate stm' = - mapM_ seenVar $ namesToList $ freeIn $ stmExp stm' - -optimiseInStm :: (Constraints rep) => Stm (Aliases rep) -> ForwardingM rep (Stm (Aliases rep)) -optimiseInStm (Let pat dec e) = - Let pat dec <$> optimiseExp e - -optimiseExp :: (Constraints rep) => Exp (Aliases rep) -> ForwardingM rep (Exp (Aliases rep)) -optimiseExp (Loop merge form body) = - bindingScope (scopeOfLoopForm form) . bindingFParams (map fst merge) $ - Loop merge form <$> optimiseBody body -optimiseExp (Op op) = do - f <- asks topOnOp - Op <$> f op -optimiseExp e = mapExpM optimise e - where - optimise = - identityMapper - { mapOnBody = const optimiseBody - } - -onSegOp :: - (Constraints rep) => - SegOp lvl (Aliases rep) -> - ForwardingM rep (SegOp lvl (Aliases rep)) -onSegOp op = - bindingScope (scopeOfSegSpace (segSpace op)) $ do - let mapper = identitySegOpMapper {mapOnSegOpBody = onKernelBody} - onKernelBody kbody = do - stms <- - deepen $ - optimiseStms (stmsToList (kernelBodyStms kbody)) $ - mapM_ seenVar $ - namesToList $ - freeIn $ - kernelBodyResult kbody - pure kbody {kernelBodyStms = stmsFromList stms} - mapSegOpM mapper op - -onMCOp :: OnOp MC -onMCOp (ParOp par_op op) = ParOp <$> traverse onSegOp par_op <*> onSegOp op -onMCOp op = pure op - -onKernelOp :: OnOp GPU -onKernelOp (SegOp op) = SegOp <$> onSegOp op -onKernelOp op = pure op - -data Entry rep = Entry - { entryNumber :: Int, - entryAliases :: Names, - entryDepth :: Int, - entryOptimisable :: Bool, - entryType :: NameInfo (Aliases rep) - } - -type VTable rep = M.Map VName (Entry rep) - -type OnOp rep = Op (Aliases rep) -> ForwardingM rep (Op (Aliases rep)) - -data TopDown rep = TopDown - { topDownCounter :: Int, - topDownTable :: VTable rep, - topDownDepth :: Int, - topLowerUpdate :: LowerUpdate rep (ForwardingM rep), - topOnOp :: OnOp rep - } - -data BottomUp rep = BottomUp - { bottomUpSeen :: Names, - forwardThese :: [DesiredUpdate (LetDec (Aliases rep))] - } - -instance Semigroup (BottomUp rep) where - BottomUp seen1 forward1 <> BottomUp seen2 forward2 = - BottomUp (seen1 <> seen2) (forward1 <> forward2) - -instance Monoid (BottomUp rep) where - mempty = BottomUp mempty mempty - -newtype ForwardingM rep a = ForwardingM (RWS (TopDown rep) (BottomUp rep) VNameSource a) - deriving - ( Monad, - Applicative, - Functor, - MonadReader (TopDown rep), - MonadWriter (BottomUp rep), - MonadState VNameSource - ) - -instance MonadFreshNames (ForwardingM rep) where - getNameSource = get - putNameSource = put - -instance (Constraints rep) => HasScope (Aliases rep) (ForwardingM rep) where - askScope = M.map entryType <$> asks topDownTable - -runForwardingM :: - LowerUpdate rep (ForwardingM rep) -> - OnOp rep -> - ForwardingM rep a -> - VNameSource -> - (a, VNameSource) -runForwardingM f g (ForwardingM m) src = - let (x, src', _) = runRWS m emptyTopDown src - in (x, src') - where - emptyTopDown = - TopDown - { topDownCounter = 0, - topDownTable = M.empty, - topDownDepth = 0, - topLowerUpdate = f, - topOnOp = g - } - -bindingParams :: - (dec -> NameInfo (Aliases rep)) -> - [Param dec] -> - ForwardingM rep a -> - ForwardingM rep a -bindingParams f params = local $ \(TopDown n vtable d x y) -> - let entry fparam = - ( paramName fparam, - Entry n mempty d False $ f $ paramDec fparam - ) - entries = M.fromList $ map entry params - in TopDown (n + 1) (M.union entries vtable) d x y - -bindingFParams :: - [FParam (Aliases rep)] -> - ForwardingM rep a -> - ForwardingM rep a -bindingFParams = bindingParams FParamName - -bindingScope :: - Scope (Aliases rep) -> - ForwardingM rep a -> - ForwardingM rep a -bindingScope scope = local $ \(TopDown n vtable d x y) -> - let entries = M.map entry scope - infoAliases (LetName (aliases, _)) = unAliases aliases - infoAliases _ = mempty - entry info = Entry n (infoAliases info) d False info - in TopDown (n + 1) (entries <> vtable) d x y - -bindingStm :: - Stm (Aliases rep) -> - ForwardingM rep a -> - ForwardingM rep a -bindingStm (Let pat _ _) = local $ \(TopDown n vtable d x y) -> - let entries = M.fromList $ map entry $ patElems pat - entry patElem = - let (aliases, _) = patElemDec patElem - in ( patElemName patElem, - Entry n (unAliases aliases) d True $ LetName $ patElemDec patElem - ) - in TopDown (n + 1) (M.union entries vtable) d x y - -bindingNumber :: VName -> ForwardingM rep Int -bindingNumber name = do - res <- asks $ fmap entryNumber . M.lookup name . topDownTable - case res of - Just n -> pure n - Nothing -> - error $ - "bindingNumber: variable " - ++ prettyString name - ++ " not found." - -deepen :: ForwardingM rep a -> ForwardingM rep a -deepen = local $ \env -> env {topDownDepth = topDownDepth env + 1} - -areAvailableBefore :: Names -> VName -> ForwardingM rep Bool -areAvailableBefore names point = do - pointN <- bindingNumber point - nameNs <- mapM bindingNumber $ namesToList names - pure $ all (< pointN) nameNs - -isInCurrentBody :: VName -> ForwardingM rep Bool -isInCurrentBody name = do - current <- asks topDownDepth - res <- asks $ fmap entryDepth . M.lookup name . topDownTable - case res of - Just d -> pure $ d == current - Nothing -> - error $ - "isInCurrentBody: variable " - ++ prettyString name - ++ " not found." - -isOptimisable :: VName -> ForwardingM rep Bool -isOptimisable name = do - res <- asks $ fmap entryOptimisable . M.lookup name . topDownTable - case res of - Just b -> pure b - Nothing -> - error $ - "isOptimisable: variable " - ++ prettyString name - ++ " not found." - -seenVar :: forall rep. VName -> ForwardingM rep () -seenVar name = do - aliases <- - asks $ - maybe mempty entryAliases - . M.lookup name - . topDownTable - tell $ (mempty :: BottomUp rep) {bottomUpSeen = oneName name <> aliases} - -tapBottomUp :: ForwardingM rep a -> ForwardingM rep (a, BottomUp rep) -tapBottomUp m = do - (x, bup) <- listen m - pure (x, bup) - -maybeForward :: - (Constraints rep) => - VName -> - VName -> - LetDec (Aliases rep) -> - Certs -> - VName -> - Slice SubExp -> - ForwardingM rep () -maybeForward v dest_nm dest_dec cs src slice = do - -- Checks condition (2) - available <- - (freeIn src <> freeIn slice <> freeIn cs) - `areAvailableBefore` v - -- Check condition (3) - samebody <- isInCurrentBody v - -- Check condition (6) - optimisable <- isOptimisable v - not_prim <- not . primType <$> lookupType v - when (available && samebody && optimisable && not_prim) $ do - let fwd = DesiredUpdate dest_nm dest_dec cs src slice v - tell mempty {forwardThese = [fwd]} diff --git a/src/Futhark/Optimise/InPlaceLowering/LowerIntoStm.hs b/src/Futhark/Optimise/InPlaceLowering/LowerIntoStm.hs deleted file mode 100644 index 7808add48e..0000000000 --- a/src/Futhark/Optimise/InPlaceLowering/LowerIntoStm.hs +++ /dev/null @@ -1,376 +0,0 @@ -{-# LANGUAGE TypeFamilies #-} - -module Futhark.Optimise.InPlaceLowering.LowerIntoStm - ( lowerUpdateGPU, - lowerUpdate, - LowerUpdate, - DesiredUpdate (..), - ) -where - -import Control.Monad -import Control.Monad.Writer -import Data.Either -import Data.List (find, unzip5) -import Data.Maybe (isNothing, mapMaybe) -import Futhark.Analysis.PrimExp.Convert -import Futhark.Construct -import Futhark.IR.Aliases -import Futhark.IR.GPU -import Futhark.Optimise.InPlaceLowering.SubstituteIndices - -data DesiredUpdate dec = DesiredUpdate - { -- | Name of result. - updateName :: VName, - -- | Type of result. - updateType :: dec, - updateCerts :: Certs, - updateSource :: VName, - updateIndices :: Slice SubExp, - updateValue :: VName - } - deriving (Show) - -instance Functor DesiredUpdate where - f `fmap` u = u {updateType = f $ updateType u} - -updateHasValue :: VName -> DesiredUpdate dec -> Bool -updateHasValue name = (name ==) . updateValue - -type LowerUpdate rep m = - Scope (Aliases rep) -> - Stm (Aliases rep) -> - [DesiredUpdate (LetDec (Aliases rep))] -> - Maybe (m [Stm (Aliases rep)]) - -lowerUpdate :: - ( MonadFreshNames m, - Buildable rep, - LetDec rep ~ Type, - AliasableRep rep - ) => - LowerUpdate rep m -lowerUpdate scope (Let pat aux (Loop merge form body)) updates = do - canDo <- lowerUpdateIntoLoop scope updates pat merge form body - Just $ do - (prestms, poststms, pat', merge', body') <- canDo - pure $ - prestms - ++ [ certify (stmAuxCerts aux) $ - mkLet pat' $ - Loop merge' form body' - ] - ++ poststms -lowerUpdate - _ - (Let pat aux (BasicOp (SubExp (Var v)))) - [DesiredUpdate bindee_nm bindee_dec cs src (Slice is) val] - | patNames pat == [src] = - let is' = fullSlice (typeOf bindee_dec) is - in Just . pure $ - [ certify (stmAuxCerts aux <> cs) $ - mkLet [Ident bindee_nm $ typeOf bindee_dec] $ - BasicOp $ - Update Unsafe v is' $ - Var val - ] -lowerUpdate _ _ _ = - Nothing - -lowerUpdateGPU :: (MonadFreshNames m) => LowerUpdate GPU m -lowerUpdateGPU - scope - (Let pat aux (Op (SegOp (SegMap lvl space ts kbody)))) - updates - | all ((`elem` patNames pat) . updateValue) updates, - not source_used_in_kbody = do - mk <- lowerUpdatesIntoSegMap scope pat ts updates space kbody - Just $ do - (pat', ts', kbody', poststms) <- mk - let cs = stmAuxCerts aux <> foldMap updateCerts updates - pure $ - certify cs (Let pat' aux $ Op $ SegOp $ SegMap lvl space ts' kbody') - : stmsToList poststms - where - -- This check is a bit more conservative than ideal. In a perfect - -- world, we would allow indexing a[i,j] if the update is also - -- to exactly a[i,j], as that will not create cross-iteration - -- dependencies. (Although the type checker wouldn't be able to - -- permit this anyway.) - source_used_in_kbody = - mconcat (map (`lookupAliases` scope) (namesToList (freeIn kbody))) - `namesIntersect` mconcat (map ((`lookupAliases` scope) . updateSource) updates) -lowerUpdateGPU scope stm updates = lowerUpdate scope stm updates - -lowerUpdatesIntoSegMap :: - (MonadFreshNames m) => - Scope (Aliases GPU) -> - Pat (LetDec (Aliases GPU)) -> - [Type] -> - [DesiredUpdate (LetDec (Aliases GPU))] -> - SegSpace -> - KernelBody (Aliases GPU) -> - Maybe - ( m - ( Pat (LetDec (Aliases GPU)), - [Type], - KernelBody (Aliases GPU), - Stms (Aliases GPU) - ) - ) -lowerUpdatesIntoSegMap scope pat ret_ts updates kspace kbody = do - -- The updates are all-or-nothing. Being more liberal would require - -- changes to the in-place-lowering pass itself. - mk <- mapM onRet (zip3 (patElems pat) ret_ts (kernelBodyResult kbody)) - pure $ do - (pes, ret_ts', bodystms, krets, poststms) <- unzip5 <$> sequence mk - pure - ( Pat pes, - ret_ts', - kbody - { kernelBodyStms = kernelBodyStms kbody <> mconcat bodystms, - kernelBodyResult = krets - }, - mconcat poststms - ) - where - (gtids, _dims) = unzip $ unSegSpace kspace - - onRet (PatElem v v_dec, _, ret) - | Just (DesiredUpdate bindee_nm bindee_dec _cs src slice _val) <- - find ((== v) . updateValue) updates = do - Returns _ cs se <- Just ret - - -- The slice we're writing per thread must fully cover the - -- underlying dimensions. - guard $ - let (dims', slice') = - unzip . drop (length gtids) . filter (isNothing . dimFix . snd) $ - zip (arrayDims (typeOf bindee_dec)) (unSlice slice) - in isFullSlice (Shape dims') (Slice slice') - - Just $ do - (slice', bodystms) <- - flip runBuilderT scope $ - traverse (toSubExp "index") $ - fixSlice (fmap pe64 slice) $ - map (pe64 . Var) gtids - - let ret' = WriteReturns cs src [(fullSlice (typeOf bindee_dec) (map DimFix slice'), se)] - - v_aliased <- newName v - - pure - ( PatElem bindee_nm bindee_dec, - typeOf bindee_dec, - bodystms, - ret', - stmsFromList - [ mkLet [Ident v_aliased $ typeOf v_dec] $ BasicOp $ Index bindee_nm slice, - mkLet [Ident v $ typeOf v_dec] $ BasicOp $ Replicate mempty $ Var v_aliased - ] - ) - onRet (pe, ret_t, ret) = - Just $ pure (pe, ret_t, mempty, ret, mempty) - -lowerUpdateIntoLoop :: - ( Buildable rep, - BuilderOps rep, - Aliased rep, - LetDec rep ~ (als, Type), - MonadFreshNames m - ) => - Scope rep -> - [DesiredUpdate (LetDec rep)] -> - Pat (LetDec rep) -> - [(FParam rep, SubExp)] -> - LoopForm -> - Body rep -> - Maybe - ( m - ( [Stm rep], - [Stm rep], - [Ident], - [(FParam rep, SubExp)], - Body rep - ) - ) -lowerUpdateIntoLoop scope updates pat val form body = do - -- Algorithm: - -- - -- 0) Map each result of the loop body to a corresponding in-place - -- update, if one exists. - -- - -- 1) Create new merge variables corresponding to the arrays being - -- updated; extend the pattern and the @res@ list with these, - -- and remove the parts of the result list that have a - -- corresponding in-place update. - -- - -- (The creation of the new merge variable identifiers is - -- actually done at the same time as step (0)). - -- - -- 2) Create in-place updates at the end of the loop body. - -- - -- 3) Create index expressions that read back the values written - -- in (2). If the merge parameter corresponding to this value - -- is unique, also @copy@ this value. - -- - -- 4) Update the result of the loop body to properly pass the new - -- arrays and indexed elements to the next iteration of the - -- loop. - -- - -- We also check that the merge parameters we work with have - -- loop-invariant shapes. - - -- Safety condition (8). - forM_ (zip val $ bodyAliases body) $ \((p, _), als) -> - guard $ paramName p `notNameIn` als - - mk_in_place_map <- summariseLoop scope updates usedInBody resmap val - - Just $ do - in_place_map <- mk_in_place_map - (val', prestms, poststms) <- mkMerges in_place_map - let valpat = mkResAndPat in_place_map - idxsubsts = indexSubstitutions in_place_map - (idxsubsts', newstms) <- substituteIndices idxsubsts $ bodyStms body - (body_res, res_stms) <- manipulateResult in_place_map idxsubsts' - let body' = mkBody (newstms <> res_stms) body_res - pure (prestms, poststms, valpat, val', body') - where - usedInBody = - mconcat $ map (`lookupAliases` scope) $ namesToList $ freeIn body <> freeIn form - resmap = zip (bodyResult body) $ patIdents pat - - mkMerges :: - (MonadFreshNames m, Buildable rep) => - [LoopResultSummary (als, Type)] -> - m ([(Param DeclType, SubExp)], [Stm rep], [Stm rep]) - mkMerges summaries = do - ((origmerge, extramerge), (prestms, poststms)) <- - runWriterT $ partitionEithers <$> mapM mkMerge summaries - pure (origmerge ++ extramerge, prestms, poststms) - - mkMerge summary - | Just (update, mergename, mergedec) <- relatedUpdate summary = do - source <- newVName "modified_source" - precopy <- newVName $ baseString (updateValue update) <> "_precopy" - let source_t = snd $ updateType update - elm_t = source_t `setArrayDims` sliceDims (updateIndices update) - tell - ( [ mkLet [Ident source source_t] . BasicOp - $ Update - Unsafe - (updateSource update) - (fullSlice source_t $ unSlice $ updateIndices update) - $ snd - $ mergeParam summary - ], - [ mkLet [Ident precopy elm_t] . BasicOp $ - Index - (updateName update) - (fullSlice source_t $ unSlice $ updateIndices update), - mkLet [Ident (updateValue update) elm_t] $ BasicOp $ Replicate mempty $ Var precopy - ] - ) - pure $ - Right - ( Param mempty mergename (toDecl (typeOf mergedec) Unique), - Var source - ) - | otherwise = pure $ Left $ mergeParam summary - - mkResAndPat summaries = - let (origpat, extrapat) = partitionEithers $ map mkResAndPat' summaries - in origpat ++ extrapat - - mkResAndPat' summary - | Just (update, _, _) <- relatedUpdate summary = - Right (Ident (updateName update) (snd $ updateType update)) - | otherwise = - Left (inPatAs summary) - -summariseLoop :: - ( Aliased rep, - MonadFreshNames m - ) => - Scope rep -> - [DesiredUpdate (als, Type)] -> - Names -> - [(SubExpRes, Ident)] -> - [(Param DeclType, SubExp)] -> - Maybe (m [LoopResultSummary (als, Type)]) -summariseLoop scope updates usedInBody resmap merge = - sequence <$> zipWithM summariseLoopResult resmap merge - where - summariseLoopResult (se, v) (fparam, mergeinit) - | Just update <- find (updateHasValue $ identName v) updates = - -- Safety condition (7) - if usedInBody `namesIntersect` lookupAliases (updateSource update) scope - then Nothing - else - if hasLoopInvariantShape fparam - then Just $ do - lowered_array <- newVName "lowered_array" - pure - LoopResultSummary - { resultSubExp = se, - inPatAs = v, - mergeParam = (fparam, mergeinit), - relatedUpdate = - Just - ( update, - lowered_array, - updateType update - ) - } - else Nothing - summariseLoopResult _ _ = - Nothing -- XXX: conservative; but this entire pass is going away. - hasLoopInvariantShape = all loopInvariant . arrayDims . paramType - - merge_param_names = map (paramName . fst) merge - - loopInvariant (Var v) = v `notElem` merge_param_names - loopInvariant Constant {} = True - -data LoopResultSummary dec = LoopResultSummary - { resultSubExp :: SubExpRes, - inPatAs :: Ident, - mergeParam :: (Param DeclType, SubExp), - relatedUpdate :: Maybe (DesiredUpdate dec, VName, dec) - } - deriving (Show) - -indexSubstitutions :: (Typed dec) => [LoopResultSummary dec] -> IndexSubstitutions -indexSubstitutions = mapMaybe getSubstitution - where - getSubstitution res = do - (DesiredUpdate _ _ cs _ is _, nm, dec) <- relatedUpdate res - let name = paramName $ fst $ mergeParam res - pure (name, (cs, nm, typeOf dec, is)) - -manipulateResult :: - (Buildable rep, MonadFreshNames m) => - [LoopResultSummary (LetDec rep)] -> - IndexSubstitutions -> - m (Result, Stms rep) -manipulateResult summaries substs = do - let (orig_ses, updated_ses) = partitionEithers $ map unchangedRes summaries - (subst_ses, res_stms) <- runWriterT $ zipWithM substRes updated_ses substs - pure (orig_ses ++ subst_ses, stmsFromList res_stms) - where - unchangedRes summary = - case relatedUpdate summary of - Nothing -> Left $ resultSubExp summary - Just _ -> Right $ resultSubExp summary - substRes (SubExpRes res_cs (Var res_v)) (subst_v, (_, nm, _, _)) - | res_v == subst_v = - pure $ SubExpRes res_cs $ Var nm - substRes (SubExpRes res_cs res_se) (_, (cs, nm, dec, Slice is)) = do - v' <- newIdent' (++ "_updated") $ Ident nm $ typeOf dec - tell - [ certify (res_cs <> cs) . mkLet [v'] . BasicOp $ - Update Unsafe nm (fullSlice (typeOf dec) is) res_se - ] - pure $ varRes $ identName v' diff --git a/src/Futhark/Optimise/InPlaceLowering/SubstituteIndices.hs b/src/Futhark/Optimise/InPlaceLowering/SubstituteIndices.hs deleted file mode 100644 index 7d4011f255..0000000000 --- a/src/Futhark/Optimise/InPlaceLowering/SubstituteIndices.hs +++ /dev/null @@ -1,191 +0,0 @@ -{-# LANGUAGE TypeFamilies #-} - --- | This module exports facilities for transforming array accesses in --- a list of 'Stm's (intended to be the bindings in a body). The --- idea is that you can state that some variable @x@ is in fact an --- array indexing @v[i0,i1,...]@. -module Futhark.Optimise.InPlaceLowering.SubstituteIndices - ( substituteIndices, - IndexSubstitution, - IndexSubstitutions, - ) -where - -import Control.Monad -import Data.Map.Strict qualified as M -import Futhark.Construct -import Futhark.IR -import Futhark.IR.Prop.Aliases -import Futhark.Transform.Substitute - --- | Essentially the components of an 'Index' expression. -type IndexSubstitution = (Certs, VName, Type, Slice SubExp) - --- | A mapping from variable names to the indexing operation they --- should be replaced with. -type IndexSubstitutions = [(VName, IndexSubstitution)] - -typeEnvFromSubstitutions :: (LParamInfo rep ~ Type) => IndexSubstitutions -> Scope rep -typeEnvFromSubstitutions = M.fromList . map (fromSubstitution . snd) - where - fromSubstitution (_, name, t, _) = - (name, LParamName t) - --- | Perform the substitution. -substituteIndices :: - ( MonadFreshNames m, - BuilderOps rep, - Buildable rep, - Aliased rep - ) => - IndexSubstitutions -> - Stms rep -> - m (IndexSubstitutions, Stms rep) -substituteIndices substs stms = - runBuilderT (substituteIndicesInStms substs stms) types - where - types = typeEnvFromSubstitutions substs - -substituteIndicesInStms :: - (MonadBuilder m, Buildable (Rep m), Aliased (Rep m)) => - IndexSubstitutions -> - Stms (Rep m) -> - m IndexSubstitutions -substituteIndicesInStms = foldM substituteIndicesInStm - -substituteIndicesInStm :: - (MonadBuilder m, Buildable (Rep m), Aliased (Rep m)) => - IndexSubstitutions -> - Stm (Rep m) -> - m IndexSubstitutions -substituteIndicesInStm substs (Let pat _ (BasicOp (Rearrange perm v))) - | Just (cs, src, src_t, is) <- lookup v substs, - [v'] <- patNames pat = do - let extra_dims = arrayRank src_t - length perm - perm' = [0 .. extra_dims - 1] ++ map (+ extra_dims) perm - src' <- - letExp (baseString v' <> "_subst") $ BasicOp $ Rearrange perm' src - src_t' <- lookupType src' - pure $ (v', (cs, src', src_t', is)) : substs -substituteIndicesInStm substs (Let pat rep e) = do - e' <- substituteIndicesInExp substs e - addStm $ Let pat rep e' - pure substs - -substituteIndicesInExp :: - (MonadBuilder m, Buildable (Rep m), Aliased (Rep m)) => - IndexSubstitutions -> - Exp (Rep m) -> - m (Exp (Rep m)) -substituteIndicesInExp substs (Op op) = do - let used_in_op = filter ((`nameIn` freeIn op) . fst) substs - var_substs <- fmap mconcat $ - forM used_in_op $ \(v, (cs, src, src_dec, Slice is)) -> do - v' <- - certifying cs $ - letExp (baseString src <> "_op_idx") $ - BasicOp $ - Index src $ - fullSlice (typeOf src_dec) is - pure $ M.singleton v v' - pure $ Op $ substituteNames var_substs op -substituteIndicesInExp substs e = do - substs' <- copyAnyConsumed e - let substitute = - identityMapper - { mapOnSubExp = substituteIndicesInSubExp substs', - mapOnVName = substituteIndicesInVar substs', - mapOnBody = const $ substituteIndicesInBody substs' - } - - mapExpM substitute e - where - copyAnyConsumed = - let consumingSubst substs' v - | Just (cs2, src2, src2dec, is2) <- lookup v substs = do - row <- - certifying cs2 $ - letSubExp (baseString v ++ "_row") $ - BasicOp $ - Index src2 $ - fullSlice (typeOf src2dec) (unSlice is2) - row_copy <- - letExp (baseString v ++ "_row_copy") . BasicOp $ - Replicate mempty row - pure $ - update - v - v - ( mempty, - row_copy, - src2dec - `setType` ( typeOf src2dec - `setArrayDims` sliceDims is2 - ), - Slice [] - ) - substs' - consumingSubst substs' _ = - pure substs' - in foldM consumingSubst substs . namesToList . consumedInExp - -substituteIndicesInSubExp :: - (MonadBuilder m) => - IndexSubstitutions -> - SubExp -> - m SubExp -substituteIndicesInSubExp substs (Var v) = - Var <$> substituteIndicesInVar substs v -substituteIndicesInSubExp _ se = - pure se - -substituteIndicesInVar :: - (MonadBuilder m) => - IndexSubstitutions -> - VName -> - m VName -substituteIndicesInVar substs v - | Just (cs2, src2, _, Slice []) <- lookup v substs = - certifying cs2 $ - letExp (baseString src2) $ - BasicOp $ - SubExp $ - Var src2 - | Just (cs2, src2, src2_dec, Slice is2) <- lookup v substs = - certifying cs2 $ - letExp (baseString src2 <> "_v_idx") $ - BasicOp $ - Index src2 $ - fullSlice (typeOf src2_dec) is2 - | otherwise = - pure v - -substituteIndicesInBody :: - (MonadBuilder m, Buildable (Rep m), Aliased (Rep m)) => - IndexSubstitutions -> - Body (Rep m) -> - m (Body (Rep m)) -substituteIndicesInBody substs (Body _ stms res) = do - (substs', stms') <- - inScopeOf stms $ - collectStms $ - substituteIndicesInStms substs stms - (res', res_stms) <- - inScopeOf stms' $ - collectStms $ - mapM (onSubExpRes substs') res - mkBodyM (stms' <> res_stms) res' - where - onSubExpRes substs' (SubExpRes cs se) = - SubExpRes cs <$> substituteIndicesInSubExp substs' se - -update :: - VName -> - VName -> - IndexSubstitution -> - IndexSubstitutions -> - IndexSubstitutions -update needle name subst ((othername, othersubst) : substs) - | needle == othername = (name, subst) : substs - | otherwise = (othername, othersubst) : update needle name subst substs -update needle _ _ [] = error $ "Cannot find substitution for " ++ prettyString needle diff --git a/src/Futhark/Passes.hs b/src/Futhark/Passes.hs index d3602cb6c5..8b9a0f74e0 100644 --- a/src/Futhark/Passes.hs +++ b/src/Futhark/Passes.hs @@ -25,7 +25,6 @@ import Futhark.Optimise.EntryPointMem import Futhark.Optimise.Fusion import Futhark.Optimise.GenRedOpt import Futhark.Optimise.HistAccs -import Futhark.Optimise.InPlaceLowering import Futhark.Optimise.InliningDeadFun import Futhark.Optimise.MemoryBlockMerging qualified as MemoryBlockMerging import Futhark.Optimise.MergeGPUBodies @@ -103,7 +102,6 @@ gpuPipeline = mergeGPUBodies, simplifyGPU, -- Cleanup merged GPUBody kernels. sinkGPU, -- Sink reads within GPUBody kernels. - inPlaceLoweringGPU, babysitKernels, -- Important to simplify after babysitting in order to fix up -- redundant manifests. @@ -118,8 +116,7 @@ seqPipeline = standardPipeline >>> onePass firstOrderTransform >>> passes - [ simplifySeq, - inPlaceLoweringSeq + [ simplifySeq ] -- | Run 'seqPipeline', then add memory information (and @@ -183,8 +180,7 @@ mcPipeline = unstreamMC, performCSE True, simplifyMC, - sinkMC, - inPlaceLoweringMC + sinkMC ] -- | Run 'mcPipeline' and then add memory information. diff --git a/tests/inplacelowering0.fut b/tests/inplacelowering0.fut deleted file mode 100644 index 3d2f9d6bbf..0000000000 --- a/tests/inplacelowering0.fut +++ /dev/null @@ -1,8 +0,0 @@ --- == --- random input { 10i64 [20]i32 } auto output --- structure seq-mem { Update 1 } --- structure gpu-mem { Update 0 } - -def main (n: i64) (xs: *[]i32) = - #[unsafe] - xs with [0:n] = map i32.i64 (map (+2) (iota n)) diff --git a/tests/inplacelowering1.fut b/tests/inplacelowering1.fut deleted file mode 100644 index 3a1f329d78..0000000000 --- a/tests/inplacelowering1.fut +++ /dev/null @@ -1,8 +0,0 @@ --- Careful with the in-place forwarding here. --- == - -def main (xs: *[20]i32) (ys: [20]i32) = - let ys' = ys[0:10] - let x = xs[4] - let xs[0:10] = ys' - in (x, xs) diff --git a/tests/inplacelowering2.fut b/tests/inplacelowering2.fut deleted file mode 100644 index 70accd8831..0000000000 --- a/tests/inplacelowering2.fut +++ /dev/null @@ -1,9 +0,0 @@ --- == --- input { [[0,0,0], [0,0,0]] } --- output { [[2,3,4], [0,0,0]] } --- structure seq-mem { Update 1 } --- structure gpu-mem { Update 0 } - -def main [n] (xs: *[][n]i32) = - #[unsafe] - xs with [0] = map i32.i64 (map (+2) (iota n)) diff --git a/tests/inplacelowering3.fut b/tests/inplacelowering3.fut deleted file mode 100644 index fa732285e1..0000000000 --- a/tests/inplacelowering3.fut +++ /dev/null @@ -1,8 +0,0 @@ --- == --- random input { [10][20][2]i32 } auto output --- structure seq-mem { Update 1 } --- structure gpu-mem { Update 0 } - -def main [n] (xs: *[n][][]i32) = - #[unsafe] - xs with [:,2,1] = map i32.i64 (map (+2) (iota n)) diff --git a/tests/inplacelowering4.fut b/tests/inplacelowering4.fut deleted file mode 100644 index f746323de9..0000000000 --- a/tests/inplacelowering4.fut +++ /dev/null @@ -1,26 +0,0 @@ --- Based on issue #1460 --- == --- input { 2i64 2i64 2i64 } --- output { --- [[[0i64, 1i64], [0i64, 1i64]], --- [[0i64, 1i64], [0i64, 1i64]], --- [[0i64, 1i64], [0i64, 1i64]], --- [[0i64, 1i64], [0i64, 1i64]], --- [[0i64, 1i64], [0i64, 1i64]], --- [[0i64, 1i64], [0i64, 1i64]], --- [[0i64, 1i64], [0i64, 1i64]], --- [[0i64, 1i64], [0i64, 1i64]], --- [[0i64, 1i64], [0i64, 1i64]], --- [[0i64, 1i64], [0i64, 1i64]]] --- } - -def update [n] [m] (mat: *[n][m]i64): *[n][m]i64 = - let mat = rotate (-1) mat - in mat with [n-1] = iota m - -def run (t:i64) (n:i64) (m:i64): [n][m]i64 = - let mat = tabulate_2d n m (+) in - loop (mat) for i < t do update mat - -def main (t:i64) (n:i64) (m:i64) = - map (\i -> run t n m) (iota 10) diff --git a/tests/inplacelowering5.fut b/tests/inplacelowering5.fut deleted file mode 100644 index 01f584b682..0000000000 --- a/tests/inplacelowering5.fut +++ /dev/null @@ -1,27 +0,0 @@ --- Based on issue #1460 --- == --- input { 2i64 2i64 } --- output { --- [[[0i64, 0i64], [0i64, 1i64]], --- [[0i64, 0i64], [0i64, 1i64]], --- [[0i64, 0i64], [0i64, 1i64]], --- [[0i64, 0i64], [0i64, 1i64]], --- [[0i64, 0i64], [0i64, 1i64]], --- [[0i64, 0i64], [0i64, 1i64]], --- [[0i64, 0i64], [0i64, 1i64]], --- [[0i64, 0i64], [0i64, 1i64]], --- [[0i64, 0i64], [0i64, 1i64]], --- [[0i64, 0i64], [0i64, 1i64]]] --- } --- compiled input { 19i64 17i64 } auto output - -def update [n] (mat: *[n][n]i64): *[n][n]i64 = - let mat = transpose mat - in mat with [n-1] = iota n - -def run (t:i64) (n:i64): [n][n]i64 = - let mat = tabulate_2d n n (+) in - loop (mat) for i < t do update mat - -def main (t:i64) (n:i64) = - map (\i -> run t n) (iota 10) diff --git a/tests/inplacelowering6.fut b/tests/inplacelowering6.fut deleted file mode 100644 index f2c8deea43..0000000000 --- a/tests/inplacelowering6.fut +++ /dev/null @@ -1,11 +0,0 @@ --- Same value is used in two updates. Currently does not optimise as --- well as it ought. --- == --- random input { 10i64 [20]i32 [30]i32 } auto output --- structure seq-mem { Update 2 } --- structure gpu-mem { Update 1 } - -def main (n: i64) (xs: *[]i32) (ys: *[]i32) = - #[unsafe] - let a = map i32.i64 (map (+2) (iota n)) - in (xs with [0:n] = a, ys with [0:n] = a) diff --git a/tests/inplacelowering7.fut b/tests/inplacelowering7.fut deleted file mode 100644 index babbe93d92..0000000000 --- a/tests/inplacelowering7.fut +++ /dev/null @@ -1,13 +0,0 @@ --- Do not move a consumption across a use of that array. --- == --- random input { 10i64 [20]i32 } auto output --- structure seq-mem { Update 2 Alloc 1 } --- structure gpu-mem { Update 1 Alloc 1 } - -def main (n: i64) (xs: *[]i32) = - #[unsafe] - -- The source write of one thread of the map must not overlap the destination uses of the other iterations in the map. We also need to check that the entire thing being written does not overlap previous uses of the destination. These two checks should not interfere with each other? - let r = map i32.i64 (map (+2) (iota n)) - let x = xs[0] - let ys = xs with [0:n] = r - in (ys, x) diff --git a/tests/memory-block-merging/coalescing/copy/pos5.fut b/tests/memory-block-merging/coalescing/copy/pos5.fut index 11b5cfc8c2..6b35e90396 100644 --- a/tests/memory-block-merging/coalescing/copy/pos5.fut +++ b/tests/memory-block-merging/coalescing/copy/pos5.fut @@ -25,7 +25,7 @@ -- } -- compiled random input { [256][256][256]i32 1i64 [256]i32 } -- auto output --- structure seq-mem { Alloc 0 } +-- structure seq-mem { Alloc 2 } -- structure gpu-mem { Alloc 2 } let main [n] (t1: *[n][n][n]i32) (i: i64) (xs: [n]i32): *[n][n][n]i32 = diff --git a/tests/memory-block-merging/coalescing/lud/lud.fut b/tests/memory-block-merging/coalescing/lud/lud.fut index 01f2d75d4e..3e86909730 100644 --- a/tests/memory-block-merging/coalescing/lud/lud.fut +++ b/tests/memory-block-merging/coalescing/lud/lud.fut @@ -2,7 +2,7 @@ -- -- == -- structure gpu-mem { Alloc 34 } --- structure seq-mem { Alloc 14 } +-- structure seq-mem { Alloc 20 } def block_size: i64 = 32 diff --git a/tests/memory-block-merging/coalescing/lud/lud_internal1-16.fut b/tests/memory-block-merging/coalescing/lud/lud_internal1-16.fut index 4dd4f368d8..b7a0595fc7 100644 --- a/tests/memory-block-merging/coalescing/lud/lud_internal1-16.fut +++ b/tests/memory-block-merging/coalescing/lud/lud_internal1-16.fut @@ -1,6 +1,6 @@ -- == -- structure gpu-mem { Alloc 2 } --- structure seq-mem { Alloc 1 } +-- structure seq-mem { Alloc 3 } let lud_internal [m] (top_per: [m][16][16]f32) (lft_per: [m][16][16]f32) (mat_slice: [m][m][16][16]f32): *[m][m][16][16]f32 = let top_slice = map transpose top_per in diff --git a/tests/memory-block-merging/coalescing/map/map4.fut b/tests/memory-block-merging/coalescing/map/map4.fut index 1fef81f7de..4f3245c751 100644 --- a/tests/memory-block-merging/coalescing/map/map4.fut +++ b/tests/memory-block-merging/coalescing/map/map4.fut @@ -8,7 +8,7 @@ -- compiled random input { [1024]i64 } -- auto output -- structure gpu-mem { Alloc 2 } --- structure seq-mem { Alloc 2 } +-- structure seq-mem { Alloc 1 } let main [n] (xs: [n]i64): [n][n]i64 = map (\j -> diff --git a/tests/memory-block-merging/coalescing/misc/two-dim-ker.fut b/tests/memory-block-merging/coalescing/misc/two-dim-ker.fut index ceb1ec8473..ddb8f044aa 100644 --- a/tests/memory-block-merging/coalescing/misc/two-dim-ker.fut +++ b/tests/memory-block-merging/coalescing/misc/two-dim-ker.fut @@ -8,7 +8,7 @@ -- input { [ [ [0i64, 1i64], [2i64, 3i64] ], [ [4i64, 5i64], [6i64, 7i64] ] ] } -- output { [[[0i64, 9i64], [0i64, 13i64]]]} -- compiled random input { [128][128][128]i64 } --- structure seq-mem { Alloc 3 } +-- structure seq-mem { Alloc 2 } -- structure gpu-mem { Alloc 3 } let main [n] (xsss: [n][n][n]i64): [][n][n]i64 = diff --git a/tests/soacs/map17.fut b/tests/soacs/map17.fut deleted file mode 100644 index 59bbf9ea30..0000000000 --- a/tests/soacs/map17.fut +++ /dev/null @@ -1,36 +0,0 @@ --- Derived from OptionPricing. Miscompiled with OpenCL backend due to --- erroneous allocation expansion. --- == --- input { --- [[1],[0],[0]] --- [[ 0.9889803798765787 ], --- [ 0.0000000000000000 ], --- [ 0.0000000000000000 ]] --- [[[0.000000]], [[0.674490]]] --- } --- output { [[[0.000000]], [[0.674490]]] } --- structure seq-mem { Update 2 } - -def doInPlaceUpdate [num_dates] - (bb_inds: [3][num_dates]i32) - (bb_data: [3][num_dates]f64) - (gauss: [num_dates]f64): [num_dates]f64 = - let bbrow = replicate num_dates 0.0 - let bbrow[ 0 ] = gauss[0] - in bbrow - -def mapInPlaceUpdate [num_dates][num_und] - (bb_inds: [3][num_dates]i32) - (bb_data: [3][num_dates]f64) - (gauss2dT: [num_und][num_dates]f64): [num_und][num_dates]f64 = - map (doInPlaceUpdate bb_inds bb_data) gauss2dT - ----------------------------------------- --- MAIN ----------------------------------------- - -def main [n][num_dates][num_und] - (bb_inds: [3][num_dates]i32) - (bb_data: [3][num_dates]f64) - (gauss_mat: [n][num_und][num_dates]f64): [][][]f64 = - map (mapInPlaceUpdate bb_inds bb_data) (gauss_mat )