Skip to content

Commit ab51c9b

Browse files
authored
Merge pull request #1830 from GaloisInc/persistent-term-hashing
Persistent structure-based term identifiers
2 parents ecab2cd + 3745791 commit ab51c9b

File tree

5 files changed

+89
-21
lines changed

5 files changed

+89
-21
lines changed

saw-core/src/Verifier/SAW/Name.hs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ data NameInfo
174174
| -- | This name was imported from some other programming language/scope
175175
ImportedName
176176
URI -- ^ An absolutely-qualified name, which is required to be unique
177-
[Text] -- ^ A collection of aliases for this name. Sorter or "less-qualified"
177+
[Text] -- ^ A collection of aliases for this name. Shorter or "less-qualified"
178178
-- aliases should be nearer the front of the list
179179

180180
deriving (Eq,Ord,Show)
@@ -249,13 +249,13 @@ data PrimName e =
249249
deriving (Show, Functor, Foldable, Traversable)
250250

251251
instance Eq (PrimName e) where
252-
x == y = primVarIndex x == primVarIndex y
252+
x == y = primName x == primName y
253253

254254
instance Ord (PrimName e) where
255-
compare x y = compare (primVarIndex x) (primVarIndex y)
255+
compare x y = compare (primName x) (primName y)
256256

257257
instance Hashable (PrimName e) where
258-
hashWithSalt x pn = hashWithSalt x (primVarIndex pn)
258+
hashWithSalt x pn = hashWithSalt x (primName pn)
259259

260260
primNameToExtCns :: PrimName e -> ExtCns e
261261
primNameToExtCns (PrimName varIdx nm tp) = EC varIdx (ModuleIdentifier nm) tp

saw-core/src/Verifier/SAW/Rewriter.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ scMatch sc pat term =
245245
-- saves the names associated with those bound variables.
246246
match :: Int -> [(LocalName, Term)] -> Term -> Term -> MatchState ->
247247
MaybeT IO MatchState
248-
match _ _ (STApp i fv _) (STApp j _ _) s
248+
match _ _ (STApp i _ fv _) (STApp j _ _ _) s
249249
| fv == emptyBitSet && i == j = return s
250250
match depth env x y s@(MatchState m cs) =
251251
-- (lift $ putStrLn $ "matching (lhs): " ++ scPrettyTerm defaultPPOpts x) >>

saw-core/src/Verifier/SAW/SharedTerm.hs

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -289,10 +289,11 @@ import Data.List (inits, find)
289289
import Data.Maybe
290290
import qualified Data.Foldable as Fold
291291
import Data.Foldable (foldl', foldlM, foldrM, maximum)
292+
import Data.Hashable (Hashable(hash))
292293
import Data.HashMap.Strict (HashMap)
293294
import qualified Data.HashMap.Strict as HMap
294-
import Data.IntMap (IntMap)
295-
import qualified Data.IntMap as IntMap
295+
import Data.IntMap.Strict (IntMap)
296+
import qualified Data.IntMap.Strict as IntMap
296297
import qualified Data.IntSet as IntSet
297298
import Data.IORef (IORef,newIORef,readIORef,modifyIORef',atomicModifyIORef',writeIORef)
298299
import Data.Map (Map)
@@ -343,7 +344,7 @@ data TermFMap a
343344
}
344345

345346
emptyTFM :: TermFMap a
346-
emptyTFM = TermFMap IntMap.empty HMap.empty
347+
emptyTFM = TermFMap mempty mempty
347348

348349
lookupTFM :: TermF Term -> TermFMap a -> Maybe a
349350
lookupTFM tf tfm =
@@ -634,18 +635,19 @@ emptyAppCache = emptyTFM
634635

635636
-- | Return term for application using existing term in cache if it is available.
636637
getTerm :: AppCacheRef -> TermF Term -> IO Term
637-
getTerm r a =
638-
modifyMVar r $ \s -> do
639-
case lookupTFM a s of
640-
Just t -> return (s, t)
638+
getTerm cache termF =
639+
modifyMVar cache $ \s -> do
640+
case lookupTFM termF s of
641+
Just term -> return (s, term)
641642
Nothing -> do
642643
i <- getUniqueInt
643-
let t = STApp { stAppIndex = i
644-
, stAppFreeVars = freesTermF (fmap looseVars a)
645-
, stAppTermF = a
646-
}
647-
let s' = insertTFM a t s
648-
seq s' $ return (s', t)
644+
let term = STApp { stAppIndex = i
645+
, stAppHash = hash termF
646+
, stAppFreeVars = freesTermF (fmap looseVars termF)
647+
, stAppTermF = termF
648+
}
649+
s' = insertTFM termF term s
650+
seq s' $ return (s', term)
649651

650652

651653
--------------------------------------------------------------------------------

saw-core/src/Verifier/SAW/Term/Functor.hs

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,10 @@ zipWithFlatTermF f = go
365365

366366
-- Term Functor ----------------------------------------------------------------
367367

368+
-- | A \"knot-tying\" structure for representing terms and term-like things.
369+
-- Often, this appears in context as the type \"'TermF' 'Term'\", in which case
370+
-- it represents a full 'Term' AST. The \"F\" stands for 'Functor', or
371+
-- occasionally for \"Former\".
368372
data TermF e
369373
= FTermF !(FlatTermF e)
370374
-- ^ The atomic, or builtin, term constructs
@@ -381,24 +385,77 @@ data TermF e
381385
-- The body and type should be closed terms.
382386
deriving (Eq, Ord, Show, Functor, Foldable, Traversable, Generic)
383387

388+
-- See the commentary on 'Hashable Term' for a note on uniqueness.
384389
instance Hashable e => Hashable (TermF e) -- automatically derived.
390+
-- NB: we may someday wish to customize this instance, for a couple reasons.
391+
--
392+
-- 1. Hash 'Constant's based on their definition, if it exists, rather than
393+
-- always using both their type and definition (as the automatically derived
394+
-- instance does). Their type, represented as an 'ExtCns', contains unavoidable
395+
-- freshness derived from a global counter (via 'scFreshGlobalVar' as
396+
-- initialized in 'Verifier.SAW.SharedTerm.mkSharedContext'), but their
397+
-- definition does not necessarily contain the same freshness.
398+
--
399+
-- 2. Improve the default, XOR-based hashing scheme to improve collision
400+
-- resistance. A polynomial-based approach may be fruitful. For a constructor
401+
-- with fields numbered 1..n, evaluate a polynomial along the lines of:
402+
-- coeff(0) * salt ^ 0 + coeff(1) + salt ^ 1 + ... + coeff(n) * salt ^ n
403+
-- where
404+
-- coeff(0) = salt `hashWithSalt` <custom per-constructor salt>
405+
-- coeff(i) = salt `hashWithSalt` <field i>
385406

386407

387408
-- Term Datatype ---------------------------------------------------------------
388409

389410
type TermIndex = Int -- Word64
390411

412+
-- | For more information on the semantics of 'Term's, see the
413+
-- [manual](https://saw.galois.com/manual.html). 'Term' and 'TermF' are split
414+
-- into two structures to facilitate mutual structural recursion (sometimes
415+
-- referred to as the ["knot-tying"](https://wiki.haskell.org/Tying_the_Knot)
416+
-- pattern, sometimes referred to in terms of ["recursion
417+
-- schemes"](https://blog.sumtypeofway.com/posts/introduction-to-recursion-schemes.html))
418+
-- and term object reuse via hash-consing.
391419
data Term
392420
= STApp
421+
-- ^ This constructor \"wraps\" a 'TermF' 'Term', assigning it a
422+
-- guaranteed-unique integer identifier and caching its likely-unique hash.
423+
-- Most 'Term's are constructed via 'STApp'. When a fresh 'TermF' is evinced
424+
-- in the course of a SAW invocation and needs to be lifted into a 'Term',
425+
-- we can see if we've already created a 'Term' wrapper for an identical
426+
-- 'TermF', and reuse it if so. The implementation of hash-consed 'Term'
427+
-- construction exists in 'Verifier.SAW.SharedTerm', in particular in the
428+
-- 'Verifier.SAW.SharedTerm.scTermF' field of the
429+
-- t'Verifier.SAW.SharedTerm.SharedContext' object.
393430
{ stAppIndex :: {-# UNPACK #-} !TermIndex
394-
, stAppFreeVars :: !BitSet -- Free variables
431+
-- ^ The UID associated with a 'Term'. It is guaranteed unique across a
432+
-- universe of properly-constructed 'Term's within a single SAW
433+
-- invocation.
434+
, stAppHash :: {-# UNPACK #-} !Int
435+
-- ^ The hash, according to 'hash', of the 'stAppTermF' field associated
436+
-- with this 'Term'. This should be as unique as a hash can be, but is
437+
-- not guaranteed unique as 'stAppIndex' is.
438+
, stAppFreeVars :: !BitSet
439+
-- ^ The free variables associated with the 'stAppTermF' field.
395440
, stAppTermF :: !(TermF Term)
441+
-- ^ The underlying 'TermF' that this 'Term' wraps. This field "ties the
442+
-- knot" of the 'Term'/'TermF' recursion scheme.
396443
}
397444
| Unshared !(TermF Term)
445+
-- ^ Used for constructing 'Term's that don't need to be shared/reused.
398446
deriving (Show, Typeable)
399447

400448
instance Hashable Term where
401-
hashWithSalt salt STApp{ stAppIndex = i } = salt `combine` 0x00000000 `hashWithSalt` hash i
449+
-- Why have 'Hashable' depend on the not-necessarily-unique hash instead of
450+
-- the necessarily-unique index? Per #1830 (PR) and #1831 (issue), we want to
451+
-- be able to derive a reference to terms based solely on their shape. Indices
452+
-- have nothing to do with a term's shape - they're assigned sequentially when
453+
-- building terms, according to the (arbitrary) order in which a term is
454+
-- built. As for uniqueness, though hashing a term based on its subterms'
455+
-- hashes introduces less randomness/freshness, it maintains plenty, and
456+
-- provides benefits as described above. No code should ever rely on total
457+
-- uniqueness of hashes, and terms are no exception.
458+
hashWithSalt salt STApp{ stAppHash = h } = salt `combine` 0x00000000 `hashWithSalt` h
402459
hashWithSalt salt (Unshared t) = salt `combine` 0x55555555 `hashWithSalt` hash t
403460

404461

@@ -408,6 +465,15 @@ combine :: Int -> Int -> Int
408465
combine h1 h2 = (h1 * 0x01000193) `xor` h2
409466

410467
instance Eq Term where
468+
-- Note: we take some minor liberties with the contract of 'hashWithSalt' in
469+
-- this implementation of 'Eq'. The contract states that if two values are
470+
-- equal according to '==', then they must have the same hash. For terms
471+
-- constructed by/within SAW, this will hold, because SAW's handling of index
472+
-- generation and assignment ensures that equality of indices implies equality
473+
-- of terms and term hashes (see 'Verifier.SAW.SharedTerm.getTerm'). However,
474+
-- if terms are constructed outside this standard procedure or in a way that
475+
-- does not respect index uniqueness rules, 'hashWithSalt''s contract could be
476+
-- violated.
411477
(==) = alphaEquiv
412478

413479
alphaEquiv :: Term -> Term -> Bool

src/SAWScript/HeapsterBuiltins.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ parseAndInsDef henv nm term_tp term_string =
276276
typed_term <- liftIO $ scTypeCheckCompleteError sc (Just mnm) un_term
277277
liftIO $ scCheckSubtype sc (Just mnm) typed_term term_tp
278278
case typedVal typed_term of
279-
STApp _ _ (Constant (EC _ (ModuleIdentifier term_ident) _) _) ->
279+
STApp _ _ _ (Constant (EC _ (ModuleIdentifier term_ident) _) _) ->
280280
return term_ident
281281
term -> do
282282
m <- liftIO $ scFindModule sc mnm

0 commit comments

Comments
 (0)