From c08ee6d54d8b0254154860df5c1a7f2fa6503c07 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Mon, 24 Mar 2025 10:51:07 +0000 Subject: [PATCH 01/43] bloomfilter: Add a simple construction benchmark For testing the bloomfilter lib in isolation, rather than the use in the lsm-tree lib. --- bloomfilter/bench/bloomfilter-bench.hs | 39 ++++++++++++++++++++++++++ lsm-tree.cabal | 11 ++++++++ 2 files changed, 50 insertions(+) create mode 100644 bloomfilter/bench/bloomfilter-bench.hs diff --git a/bloomfilter/bench/bloomfilter-bench.hs b/bloomfilter/bench/bloomfilter-bench.hs new file mode 100644 index 000000000..2686d9ed2 --- /dev/null +++ b/bloomfilter/bench/bloomfilter-bench.hs @@ -0,0 +1,39 @@ +module Main where + +import qualified Data.BloomFilter as B +import qualified Data.BloomFilter.Easy as B +import Data.BloomFilter.Hash (Hashable (..), hash64) + +import Data.Word (Word64) +import System.Random + +import Criterion.Main + +main :: IO () +main = + defaultMain [ + env newStdGen $ \g0 -> + bench "construct bloom m=1e6 fpr=1%" $ + whnf (constructBloom 1_000_000 0.01) g0 + + , env newStdGen $ \g0 -> + bench "construct bloom m=1e6 fpr=0.1%" $ + whnf (constructBloom 1_000_000 0.001) g0 + + , env newStdGen $ \g0 -> + bench "construct bloom m=1e7 fpr=0.1%" $ + whnf (constructBloom 10_000_000 0.001) g0 + ] + +constructBloom :: Int -> Double -> StdGen -> B.Bloom Word64 +constructBloom n fpr g0 = + let (bits, hashes) = B.suggestSizing n fpr in + B.unfold hashes bits nextElement (g0, 0) + where + nextElement :: (StdGen, Int) -> Maybe (Word64, (StdGen, Int)) + nextElement (!g, !i) + | i >= n = Nothing + | otherwise = Just (x, (g', i+1)) + where + (!x, !g') = uniform g + diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 1f3baa3fe..8ec82b4d0 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -446,6 +446,17 @@ test-suite bloomfilter-tests , tasty-quickcheck , vector +benchmark bloomfilter-bench + import: language + type: exitcode-stdio-1.0 + hs-source-dirs: bloomfilter/bench + main-is: bloomfilter-bench.hs + build-depends: + , base + , criterion + , lsm-tree:bloomfilter + , random + test-suite bloomfilter-primes import: language type: exitcode-stdio-1.0 From 02d83115173ccf124a138fcda6606136aadafdd8 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Wed, 19 Mar 2025 15:51:20 +0000 Subject: [PATCH 02/43] bloomfilter: removes Hashes, specialise to CheapHashes scheme We don't need multiple schemes. --- bloomfilter/src/Data/BloomFilter.hs | 53 ++++++++----------- bloomfilter/src/Data/BloomFilter/Hash.hs | 42 +++------------ bloomfilter/src/Data/BloomFilter/Internal.hs | 16 +++--- bloomfilter/src/Data/BloomFilter/Mutable.hs | 21 +++----- .../src/Data/BloomFilter/Mutable/Internal.hs | 12 ++--- .../LSMTree/Internal/BloomFilterQuery1.hs | 2 +- .../LSMTree/Internal/BloomFilterQuery2.hs | 6 +-- 7 files changed, 57 insertions(+), 95 deletions(-) diff --git a/bloomfilter/src/Data/BloomFilter.hs b/bloomfilter/src/Data/BloomFilter.hs index 587dd99cf..98310beb8 100644 --- a/bloomfilter/src/Data/BloomFilter.hs +++ b/bloomfilter/src/Data/BloomFilter.hs @@ -32,10 +32,7 @@ module Data.BloomFilter ( Hash, Bloom, MBloom, - Bloom', - MBloom', CheapHashes, - RealHashes, -- * Immutable Bloom filters @@ -61,10 +58,10 @@ module Data.BloomFilter ( import Control.Exception (assert) import Control.Monad (forM_, liftM) import Control.Monad.ST (ST, runST) -import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, - Hashes (..), RealHashes) -import Data.BloomFilter.Internal (Bloom' (..), bloomInvariant) -import Data.BloomFilter.Mutable (MBloom, MBloom', insert, new) +import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, + makeHashes) +import Data.BloomFilter.Internal (Bloom (..), bloomInvariant) +import Data.BloomFilter.Mutable (MBloom, insert, new) import qualified Data.BloomFilter.Mutable.Internal as MB import Data.Word (Word64) @@ -72,9 +69,6 @@ import Prelude hiding (elem, length, notElem) import qualified Data.BloomFilter.BitVec64 as V --- | Bloom filter using 'CheapHashes' hashing scheme. -type Bloom = Bloom' CheapHashes - -- | Create an immutable Bloom filter, using the given setup function -- which executes in the 'ST' monad. -- @@ -92,8 +86,8 @@ type Bloom = Bloom' CheapHashes -- Note that the result of the setup function is not used. create :: Int -- ^ number of hash functions to use -> Word64 -- ^ number of bits in filter - -> (forall s. (MBloom' s h a -> ST s ())) -- ^ setup function - -> Bloom' h a + -> (forall s. (MBloom s a -> ST s ())) -- ^ setup function + -> Bloom a {-# INLINE create #-} create hash numBits body = runST $ do mb <- new hash numBits @@ -102,7 +96,7 @@ create hash numBits body = runST $ do -- | Create an immutable Bloom filter from a mutable one. The mutable -- filter may be modified afterwards. -freeze :: MBloom' s h a -> ST s (Bloom' h a) +freeze :: MBloom s a -> ST s (Bloom a) freeze mb = do ba <- V.freeze (MB.bitArray mb) let !bf = Bloom (MB.hashesN mb) (MB.size mb) ba @@ -111,7 +105,7 @@ freeze mb = do -- | Create an immutable Bloom filter from a mutable one. The mutable -- filter /must not/ be modified afterwards, or a runtime crash may -- occur. For a safer creation interface, use 'freeze' or 'create'. -unsafeFreeze :: MBloom' s h a -> ST s (Bloom' h a) +unsafeFreeze :: MBloom s a -> ST s (Bloom a) unsafeFreeze mb = do ba <- V.unsafeFreeze (MB.bitArray mb) let !bf = Bloom (MB.hashesN mb) (MB.size mb) ba @@ -119,33 +113,32 @@ unsafeFreeze mb = do -- | Copy an immutable Bloom filter to create a mutable one. There is -- no non-copying equivalent. -thaw :: Bloom' h a -> ST s (MBloom' s h a) +thaw :: Bloom a -> ST s (MBloom s a) thaw ub = MB.MBloom (hashesN ub) (size ub) `liftM` V.thaw (bitArray ub) -- | Create an empty Bloom filter. empty :: Int -- ^ number of hash functions to use -> Word64 -- ^ number of bits in filter - -> Bloom' h a + -> Bloom a {-# INLINE [1] empty #-} empty hash numBits = create hash numBits (\_ -> return ()) -- | Create a Bloom filter with a single element. -singleton :: (Hashes h, Hashable a) +singleton :: Hashable a => Int -- ^ number of hash functions to use -> Word64 -- ^ number of bits in filter -> a -- ^ element to insert - -> Bloom' h a + -> Bloom a singleton hash numBits elt = create hash numBits (\mb -> insert mb elt) -- | Query an immutable Bloom filter for membership. If the value is -- present, return @True@. If the value is not present, there is -- /still/ some possibility that @True@ will be returned. -elem :: (Hashes h, Hashable a) => a -> Bloom' h a -> Bool +elem :: Hashable a => a -> Bloom a -> Bool elem elt ub = elemHashes (makeHashes elt) ub -{-# SPECIALISE elem :: Hashable a => a -> Bloom a -> Bool #-} -- | Query an immutable Bloom filter for membership using already constructed 'Hashes' value. -elemHashes :: Hashes h => h a -> Bloom' h a -> Bool +elemHashes :: CheapHashes a -> Bloom a -> Bool elemHashes !ch !ub = go 0 where go :: Int -> Bool go !i | i >= hashesN ub @@ -160,20 +153,19 @@ elemHashes !ch !ub = go 0 where if V.unsafeIndex (bitArray ub) idx then go (i + 1) else False -{-# SPECIALISE elemHashes :: CheapHashes a -> Bloom a -> Bool #-} -- | Query an immutable Bloom filter for non-membership. If the value -- /is/ present, return @False@. If the value is not present, there -- is /still/ some possibility that @False@ will be returned. -notElem :: (Hashes h, Hashable a) => a -> Bloom' h a -> Bool +notElem :: Hashable a => a -> Bloom a -> Bool notElem elt ub = notElemHashes (makeHashes elt) ub -- | Query an immutable Bloom filter for non-membership using already constructed 'Hashes' value. -notElemHashes :: Hashes h => h a -> Bloom' h a -> Bool +notElemHashes :: CheapHashes a -> Bloom a -> Bool notElemHashes !ch !ub = not (elemHashes ch ub) -- | Return the size of an immutable Bloom filter, in bits. -length :: Bloom' h a -> Word64 +length :: Bloom a -> Word64 length = size -- | Build an immutable Bloom filter from a seed value. The seeding @@ -184,15 +176,16 @@ length = size -- -- * If it returns @'Just' (a,b)@, @a@ is added to the filter and -- @b@ is used as a new seed. -unfold :: forall a b h. (Hashes h, Hashable a) +unfold :: forall a b. + Hashable a => Int -- ^ number of hash functions to use -> Word64 -- ^ number of bits in filter -> (b -> Maybe (a, b)) -- ^ seeding function -> b -- ^ initial seed - -> Bloom' h a + -> Bloom a {-# INLINE unfold #-} unfold hs numBits f k = create hs numBits (loop k) - where loop :: forall s. b -> MBloom' s h a -> ST s () + where loop :: forall s. b -> MBloom s a -> ST s () loop j mb = case f j of Just (a, j') -> insert mb a >> loop j' mb _ -> return () @@ -207,11 +200,11 @@ unfold hs numBits f k = create hs numBits (loop k) -- @ -- filt = fromList 3 1024 [\"foo\", \"bar\", \"quux\"] -- @ -fromList :: (Hashes h, Hashable a) +fromList :: Hashable a => Int -- ^ number of hash functions to use -> Word64 -- ^ number of bits in filter -> [a] -- ^ values to populate with - -> Bloom' h a + -> Bloom a fromList hs numBits list = create hs numBits $ forM_ list . insert -- $overview diff --git a/bloomfilter/src/Data/BloomFilter/Hash.hs b/bloomfilter/src/Data/BloomFilter/Hash.hs index 0dc6a0bf1..fc3378eb2 100644 --- a/bloomfilter/src/Data/BloomFilter/Hash.hs +++ b/bloomfilter/src/Data/BloomFilter/Hash.hs @@ -15,13 +15,10 @@ module Data.BloomFilter.Hash ( Incremental (..), HashState, incrementalHash, - -- * Hashing - Hashes (..), - RealHashes (..), -- * Compute a family of hash values CheapHashes (..), - evalCheapHashes, - makeCheapHashes, + evalHashes, + makeHashes, ) where import Control.Monad (forM_) @@ -143,23 +140,6 @@ incrementalHash seed f = runST $ do f (HashState s) XXH3.xxh3_64bit_digest s -------------------------------------------------------------------------------- --- Hashes -------------------------------------------------------------------------------- - --- | A type class abstracting over different hashing schemes.b -class Hashes h where - makeHashes :: Hashable a => a -> h a - - evalHashes :: h a -> Int -> Hash - --- | A closure of real hashing function. -newtype RealHashes a = RealHashes (Word64 -> Hash) - -instance Hashes RealHashes where - makeHashes x = RealHashes (\salt -> hashSalt64 salt x) - evalHashes (RealHashes f) i = f (fromIntegral i) - ------------------------------------------------------------------------------- -- CheapHashes ------------------------------------------------------------------------------- @@ -171,12 +151,6 @@ data CheapHashes a = CheapHashes !Hash !Hash deriving Show type role CheapHashes nominal -instance Hashes CheapHashes where - makeHashes = makeCheapHashes - {-# INLINE makeHashes #-} - evalHashes = evalCheapHashes - {-# INLINE evalHashes #-} - instance Prim (CheapHashes a) where sizeOfType# _ = 16# alignmentOfType# _ = 8# @@ -285,13 +259,13 @@ https://github.com/facebook/rocksdb/blob/096fb9b67d19a9a180e7c906b4a0cdb2b2d0c1f -- g_i = h_0 + \left\lfloor h_1 / 2^i \right\rfloor -- \] -- -evalCheapHashes :: CheapHashes a -> Int -> Hash -evalCheapHashes (CheapHashes h1 h2) i = h1 + (h2 `unsafeShiftR` i) +evalHashes :: CheapHashes a -> Int -> Hash +evalHashes (CheapHashes h1 h2) i = h1 + (h2 `unsafeShiftR` i) -- | Create 'CheapHashes' structure. -- -- It's simply hashes the value twice using seed 0 and 1. -makeCheapHashes :: Hashable a => a -> CheapHashes a -makeCheapHashes v = CheapHashes (hashSalt64 0 v) (hashSalt64 1 v) -{-# SPECIALISE makeCheapHashes :: BS.ByteString -> CheapHashes BS.ByteString #-} -{-# INLINEABLE makeCheapHashes #-} +makeHashes :: Hashable a => a -> CheapHashes a +makeHashes v = CheapHashes (hashSalt64 0 v) (hashSalt64 1 v) +{-# SPECIALISE makeHashes :: BS.ByteString -> CheapHashes BS.ByteString #-} +{-# INLINEABLE makeHashes #-} diff --git a/bloomfilter/src/Data/BloomFilter/Internal.hs b/bloomfilter/src/Data/BloomFilter/Internal.hs index 48b33f4de..cd737ddbb 100644 --- a/bloomfilter/src/Data/BloomFilter/Internal.hs +++ b/bloomfilter/src/Data/BloomFilter/Internal.hs @@ -1,7 +1,7 @@ {-# OPTIONS_HADDOCK not-home #-} -- | This module exports 'Bloom'' definition. module Data.BloomFilter.Internal ( - Bloom'(..), + Bloom(..), bloomInvariant, ) where @@ -13,15 +13,15 @@ import Data.Primitive.ByteArray (sizeofByteArray) import qualified Data.Vector.Primitive as VP import Data.Word (Word64) -type Bloom' :: (Type -> Type) -> Type -> Type -data Bloom' h a = Bloom { +type Bloom :: Type -> Type +data Bloom a = Bloom { hashesN :: {-# UNPACK #-} !Int , size :: {-# UNPACK #-} !Word64 -- ^ size is non-zero , bitArray :: {-# UNPACK #-} !V.BitVec64 } -type role Bloom' nominal nominal +type role Bloom nominal -bloomInvariant :: Bloom' h a -> Bool +bloomInvariant :: Bloom a -> Bool bloomInvariant (Bloom _ s (V.BV64 (VP.Vector off len ba))) = s > 0 && s <= 2^(48 :: Int) @@ -31,7 +31,7 @@ bloomInvariant (Bloom _ s (V.BV64 (VP.Vector off len ba))) = where ceilDiv64 x = unsafeShiftR (x + 63) 6 -instance Eq (Bloom' h a) where +instance Eq (Bloom a) where -- We support arbitrary sized bitvectors, -- therefore an equality is a bit involved: -- we need to be careful when comparing the last bits of bitArray. @@ -49,8 +49,8 @@ instance Eq (Bloom' h a) where x = VP.unsafeIndex v w x' = VP.unsafeIndex v' w -instance Show (Bloom' h a) where +instance Show (Bloom a) where show mb = "Bloom { " ++ show (size mb) ++ " bits } " -instance NFData (Bloom' h a) where +instance NFData (Bloom a) where rnf !_ = () diff --git a/bloomfilter/src/Data/BloomFilter/Mutable.hs b/bloomfilter/src/Data/BloomFilter/Mutable.hs index 0d986bded..c83f179ce 100644 --- a/bloomfilter/src/Data/BloomFilter/Mutable.hs +++ b/bloomfilter/src/Data/BloomFilter/Mutable.hs @@ -27,9 +27,7 @@ module Data.BloomFilter.Mutable ( -- * Types Hash, MBloom, - MBloom', CheapHashes, - RealHashes, -- * Mutable Bloom filters -- ** Creation @@ -45,8 +43,8 @@ module Data.BloomFilter.Mutable ( import Control.Monad (liftM) import Control.Monad.ST (ST) -import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, - Hashes (..), RealHashes) +import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, + makeHashes) import Data.BloomFilter.Mutable.Internal import Data.Word (Word64) @@ -54,16 +52,13 @@ import qualified Data.BloomFilter.BitVec64 as V import Prelude hiding (elem, length) --- | Mutable Bloom filter using CheapHashes hashing scheme. -type MBloom s = MBloom' s CheapHashes - -- | Create a new mutable Bloom filter. -- -- The size is ceiled at $2^48$. Tell us if you need bigger bloom filters. -- new :: Int -- ^ number of hash functions to use -> Word64 -- ^ number of bits in filter - -> ST s (MBloom' s h a) + -> ST s (MBloom s a) new hash numBits = MBloom hash numBits' `liftM` V.new numBits' where numBits' | numBits == 0 = 1 | numBits >= 0xffff_ffff_ffff = 0x1_0000_0000_0000 @@ -71,10 +66,10 @@ new hash numBits = MBloom hash numBits' `liftM` V.new numBits' -- | Insert a value into a mutable Bloom filter. Afterwards, a -- membership query for the same value is guaranteed to return @True@. -insert :: (Hashes h, Hashable a) => MBloom' s h a -> a -> ST s () +insert :: Hashable a => MBloom s a -> a -> ST s () insert !mb !x = insertHashes mb (makeHashes x) -insertHashes :: Hashes h => MBloom' s h a -> h a -> ST s () +insertHashes :: MBloom s a -> CheapHashes a -> ST s () insertHashes (MBloom k m v) !h = go 0 where go !i | i >= k = return () @@ -84,10 +79,10 @@ insertHashes (MBloom k m v) !h = go 0 -- | Query a mutable Bloom filter for membership. If the value is -- present, return @True@. If the value is not present, there is -- /still/ some possibility that @True@ will be returned. -elem :: (Hashes h, Hashable a) => a -> MBloom' s h a -> ST s Bool +elem :: Hashable a => a -> MBloom s a -> ST s Bool elem elt mb = elemHashes (makeHashes elt) mb -elemHashes :: forall h s a. Hashes h => h a -> MBloom' s h a -> ST s Bool +elemHashes :: forall s a. CheapHashes a -> MBloom s a -> ST s Bool elemHashes !ch (MBloom k m v) = go 0 where go :: Int -> ST s Bool go !i | i >= k = return True @@ -99,7 +94,7 @@ elemHashes !ch (MBloom k m v) = go 0 where else return False -- | Return the size of a mutable Bloom filter, in bits. -length :: MBloom' s h a -> Word64 +length :: MBloom s a -> Word64 length = size -- $overview diff --git a/bloomfilter/src/Data/BloomFilter/Mutable/Internal.hs b/bloomfilter/src/Data/BloomFilter/Mutable/Internal.hs index 0c61fc4be..9e8d899d2 100644 --- a/bloomfilter/src/Data/BloomFilter/Mutable/Internal.hs +++ b/bloomfilter/src/Data/BloomFilter/Mutable/Internal.hs @@ -1,7 +1,7 @@ {-# OPTIONS_HADDOCK not-home #-} --- | This module exports 'MBloom'' internals. +-- | This module exports 'MBloom' internals. module Data.BloomFilter.Mutable.Internal ( - MBloom'(..), + MBloom(..), ) where import qualified Data.BloomFilter.BitVec64 as V @@ -12,13 +12,13 @@ import Prelude hiding (div, divMod, elem, length, mod, notElem, rem, (*), (/)) -- | A mutable Bloom filter, for use within the 'ST' monad. -type MBloom' :: Type -> (Type -> Type) -> Type -> Type -data MBloom' s h a = MBloom { +type MBloom :: Type -> Type -> Type +data MBloom s a = MBloom { hashesN :: {-# UNPACK #-} !Int , size :: {-# UNPACK #-} !Word64 -- ^ size is non-zero , bitArray :: {-# UNPACK #-} !(V.MBitVec64 s) } -type role MBloom' nominal nominal nominal +type role MBloom nominal nominal -instance Show (MBloom' s h a) where +instance Show (MBloom s a) where show mb = "MBloom { " ++ show (size mb) ++ " bits } " diff --git a/src/Database/LSMTree/Internal/BloomFilterQuery1.hs b/src/Database/LSMTree/Internal/BloomFilterQuery1.hs index 3ec17ddfd..cf65a1d8a 100644 --- a/src/Database/LSMTree/Internal/BloomFilterQuery1.hs +++ b/src/Database/LSMTree/Internal/BloomFilterQuery1.hs @@ -95,7 +95,7 @@ bloomQueries !blooms !ks !ksN = V.length ks hs :: VP.Vector (Bloom.CheapHashes SerialisedKey) - !hs = VP.generate ksN $ \i -> Bloom.makeCheapHashes (V.unsafeIndex ks i) + !hs = VP.generate ksN $ \i -> Bloom.makeHashes (V.unsafeIndex ks i) -- Loop over all run indexes loop1 :: diff --git a/src/Database/LSMTree/Internal/BloomFilterQuery2.hs b/src/Database/LSMTree/Internal/BloomFilterQuery2.hs index a8fda56ab..83f711c2e 100644 --- a/src/Database/LSMTree/Internal/BloomFilterQuery2.hs +++ b/src/Database/LSMTree/Internal/BloomFilterQuery2.hs @@ -218,7 +218,7 @@ prepKeyHashes :: V.Vector SerialisedKey -> P.PrimArray (Bloom.CheapHashes SerialisedKey) prepKeyHashes keys = P.generatePrimArray (V.length keys) $ \i -> - Bloom.makeCheapHashes (V.unsafeIndex keys i) + Bloom.makeHashes (V.unsafeIndex keys i) prepInitialCandidateProbes :: P.StrictArray (Bloom SerialisedKey) @@ -246,7 +246,7 @@ prepInitialCandidateProbes !keyhash = P.indexPrimArray keyhashes kix !hn = BF.hashesN filter - 1 !bix = (fromIntegral :: Word64 -> Int) $ - Bloom.evalCheapHashes keyhash hn + Bloom.evalHashes keyhash hn `BV64.unsafeRemWord64` -- size must be > 0 BF.size filter -- bloomInvariant ensures this BV64.prefetchIndex (BF.bitArray filter) bix @@ -390,7 +390,7 @@ bloomQueriesBody !filters !keyhashes !candidateProbes = assert (hn >= 0 && hn < BF.hashesN filter) $ do let !keyhash = P.indexPrimArray keyhashes kix !bix = (fromIntegral :: Word64 -> Int) $ - Bloom.evalCheapHashes keyhash hn + Bloom.evalHashes keyhash hn `BV64.unsafeRemWord64` -- size must be > 0 BF.size filter -- bloomInvariant ensures this BV64.prefetchIndex (BF.bitArray filter) bix From c53370d1113259dd7a27dc6f7200d78d076ed883 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 22 Apr 2025 17:28:35 +0100 Subject: [PATCH 03/43] bloomfilter: use ByteArray type from primitive package rather than Data.Array.Byte. It seems to be compatible with a wider range of ghc & lib versions this way. --- bloomfilter/src/Data/BloomFilter/Hash.hs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/bloomfilter/src/Data/BloomFilter/Hash.hs b/bloomfilter/src/Data/BloomFilter/Hash.hs index fc3378eb2..6b52906c9 100644 --- a/bloomfilter/src/Data/BloomFilter/Hash.hs +++ b/bloomfilter/src/Data/BloomFilter/Hash.hs @@ -23,7 +23,6 @@ module Data.BloomFilter.Hash ( import Control.Monad (forM_) import Control.Monad.ST (ST, runST) -import Data.Array.Byte (ByteArray (..)) import Data.Bits (unsafeShiftR) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS @@ -72,7 +71,7 @@ instance Hashable LBS.ByteString where forM_ (LBS.toChunks lbs) $ \bs -> update s bs -instance Hashable ByteArray where +instance Hashable P.ByteArray where hashSalt64 salt ba = XXH3.xxh3_64bit_withSeed_ba ba 0 (P.sizeofByteArray ba) salt instance Hashable Word64 where @@ -105,8 +104,8 @@ instance (Hashable a, Hashable b) => Hashable (a, b) where update s (hash64 x) update s (hash64 y) --- | Hash a (part of) 'ByteArray'. -hashByteArray :: ByteArray -> Int -> Int -> Word64 -> Word64 +-- | Hash a (part of) 'P.ByteArray'. +hashByteArray :: P.ByteArray -> Int -> Int -> Word64 -> Word64 hashByteArray = XXH3.xxh3_64bit_withSeed_ba ------------------------------------------------------------------------------- From dbff02a88a172da082ca221a31f82b341588e864 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Wed, 19 Mar 2025 23:36:13 +0000 Subject: [PATCH 04/43] bloomfilter: combine a couple modules into one merge Data.BloomFilter.Mutable.Internal into Data.BloomFilter.Mutable --- bloomfilter/src/Data/BloomFilter.hs | 2 +- bloomfilter/src/Data/BloomFilter/Mutable.hs | 16 +++++++++++-- .../src/Data/BloomFilter/Mutable/Internal.hs | 24 ------------------- lsm-tree.cabal | 1 - 4 files changed, 15 insertions(+), 28 deletions(-) delete mode 100644 bloomfilter/src/Data/BloomFilter/Mutable/Internal.hs diff --git a/bloomfilter/src/Data/BloomFilter.hs b/bloomfilter/src/Data/BloomFilter.hs index 98310beb8..4af4b62a5 100644 --- a/bloomfilter/src/Data/BloomFilter.hs +++ b/bloomfilter/src/Data/BloomFilter.hs @@ -62,7 +62,7 @@ import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, makeHashes) import Data.BloomFilter.Internal (Bloom (..), bloomInvariant) import Data.BloomFilter.Mutable (MBloom, insert, new) -import qualified Data.BloomFilter.Mutable.Internal as MB +import qualified Data.BloomFilter.Mutable as MB import Data.Word (Word64) import Prelude hiding (elem, length, notElem) diff --git a/bloomfilter/src/Data/BloomFilter/Mutable.hs b/bloomfilter/src/Data/BloomFilter/Mutable.hs index c83f179ce..fed2c09ec 100644 --- a/bloomfilter/src/Data/BloomFilter/Mutable.hs +++ b/bloomfilter/src/Data/BloomFilter/Mutable.hs @@ -26,7 +26,7 @@ module Data.BloomFilter.Mutable ( -- * Types Hash, - MBloom, + MBloom (..), CheapHashes, -- * Mutable Bloom filters @@ -45,13 +45,25 @@ import Control.Monad (liftM) import Control.Monad.ST (ST) import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, makeHashes) -import Data.BloomFilter.Mutable.Internal +import Data.Kind (Type) import Data.Word (Word64) import qualified Data.BloomFilter.BitVec64 as V import Prelude hiding (elem, length) +type MBloom :: Type -> Type -> Type +-- | A mutable Bloom filter, for use within the 'ST' monad. +data MBloom s a = MBloom { + hashesN :: {-# UNPACK #-} !Int + , size :: {-# UNPACK #-} !Word64 -- ^ size is non-zero + , bitArray :: {-# UNPACK #-} !(V.MBitVec64 s) + } +type role MBloom nominal nominal + +instance Show (MBloom s a) where + show mb = "MBloom { " ++ show (size mb) ++ " bits } " + -- | Create a new mutable Bloom filter. -- -- The size is ceiled at $2^48$. Tell us if you need bigger bloom filters. diff --git a/bloomfilter/src/Data/BloomFilter/Mutable/Internal.hs b/bloomfilter/src/Data/BloomFilter/Mutable/Internal.hs deleted file mode 100644 index 9e8d899d2..000000000 --- a/bloomfilter/src/Data/BloomFilter/Mutable/Internal.hs +++ /dev/null @@ -1,24 +0,0 @@ -{-# OPTIONS_HADDOCK not-home #-} --- | This module exports 'MBloom' internals. -module Data.BloomFilter.Mutable.Internal ( - MBloom(..), -) where - -import qualified Data.BloomFilter.BitVec64 as V -import Data.Kind (Type) -import Data.Word (Word64) - -import Prelude hiding (div, divMod, elem, length, mod, notElem, rem, - (*), (/)) - --- | A mutable Bloom filter, for use within the 'ST' monad. -type MBloom :: Type -> Type -> Type -data MBloom s a = MBloom { - hashesN :: {-# UNPACK #-} !Int - , size :: {-# UNPACK #-} !Word64 -- ^ size is non-zero - , bitArray :: {-# UNPACK #-} !(V.MBitVec64 s) - } -type role MBloom nominal nominal - -instance Show (MBloom s a) where - show mb = "MBloom { " ++ show (size mb) ++ " bits } " diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 8ec82b4d0..b567f01dd 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -425,7 +425,6 @@ library bloomfilter Data.BloomFilter.Hash Data.BloomFilter.Internal Data.BloomFilter.Mutable - Data.BloomFilter.Mutable.Internal ghc-options: -O2 -Wall From a5fe94544b6bcb50055a2eccd99e3bd53bd523e4 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Wed, 26 Mar 2025 13:10:02 +0000 Subject: [PATCH 05/43] bloomfilter: Remove pointless exported functions and export a helpful construction function. --- bloomfilter/src/Data/BloomFilter.hs | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/bloomfilter/src/Data/BloomFilter.hs b/bloomfilter/src/Data/BloomFilter.hs index 4af4b62a5..a554c4ca4 100644 --- a/bloomfilter/src/Data/BloomFilter.hs +++ b/bloomfilter/src/Data/BloomFilter.hs @@ -42,11 +42,9 @@ module Data.BloomFilter ( unsafeFreeze, -- ** Creation + create, unfold, - fromList, - empty, - singleton, -- ** Accessors length, @@ -116,21 +114,6 @@ unsafeFreeze mb = do thaw :: Bloom a -> ST s (MBloom s a) thaw ub = MB.MBloom (hashesN ub) (size ub) `liftM` V.thaw (bitArray ub) --- | Create an empty Bloom filter. -empty :: Int -- ^ number of hash functions to use - -> Word64 -- ^ number of bits in filter - -> Bloom a -{-# INLINE [1] empty #-} -empty hash numBits = create hash numBits (\_ -> return ()) - --- | Create a Bloom filter with a single element. -singleton :: Hashable a - => Int -- ^ number of hash functions to use - -> Word64 -- ^ number of bits in filter - -> a -- ^ element to insert - -> Bloom a -singleton hash numBits elt = create hash numBits (\mb -> insert mb elt) - -- | Query an immutable Bloom filter for membership. If the value is -- present, return @True@. If the value is not present, there is -- /still/ some possibility that @True@ will be returned. From 98add8eb62523c5321771a54f501491b040a3019 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Wed, 26 Mar 2025 14:45:14 +0000 Subject: [PATCH 06/43] bloomfilter: misc minor cleanups of the tests --- bloomfilter/tests/QCSupport.hs | 11 -------- bloomfilter/tests/bloomfilter-tests.hs | 36 +++++++++++++++++++------- lsm-tree.cabal | 5 +--- 3 files changed, 27 insertions(+), 25 deletions(-) delete mode 100644 bloomfilter/tests/QCSupport.hs diff --git a/bloomfilter/tests/QCSupport.hs b/bloomfilter/tests/QCSupport.hs deleted file mode 100644 index f26888616..000000000 --- a/bloomfilter/tests/QCSupport.hs +++ /dev/null @@ -1,11 +0,0 @@ -module QCSupport (P(..)) where - -import Test.QuickCheck (Arbitrary (..), choose) -import Test.QuickCheck.Instances () - -newtype P = P { unP :: Double } - deriving (Eq, Ord, Show) - -instance Arbitrary P where - arbitrary = P <$> choose (epsilon, 1 - epsilon) - where epsilon = 1e-6 :: Double diff --git a/bloomfilter/tests/bloomfilter-tests.hs b/bloomfilter/tests/bloomfilter-tests.hs index e1710d987..51b2fe5bf 100644 --- a/bloomfilter/tests/bloomfilter-tests.hs +++ b/bloomfilter/tests/bloomfilter-tests.hs @@ -1,24 +1,22 @@ -{-# LANGUAGE NumericUnderscores #-} -module Main where +module Main (main) where -import Control.Monad (forM_) +import qualified Data.BloomFilter as B import qualified Data.BloomFilter.BitVec64 as BV64 import qualified Data.BloomFilter.Easy as B import Data.BloomFilter.Hash (Hashable (..), hash64) import qualified Data.BloomFilter.Internal as BI + import Data.ByteString (ByteString) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS -import Data.Int (Int16, Int32, Int64, Int8) +import Data.Int (Int64) import qualified Data.Vector.Primitive as VP import Data.Word (Word32, Word64) -import System.IO (BufferMode (..), hSetBuffering, stdout) + +import Test.QuickCheck.Instances () import Test.Tasty import Test.Tasty.QuickCheck -import QCSupport (P (..)) - - main :: IO () main = defaultMain tests @@ -55,8 +53,8 @@ tests = testGroup "bloomfilter" -- Element is in a Bloom filter ------------------------------------------------------------------------------- -prop_pai :: (Hashable a) => a -> a -> [a] -> P -> Property -prop_pai _ x xs (P q) = let bf = B.easyList q (x:xs) in +prop_pai :: (Hashable a) => a -> a -> [a] -> FPR -> Property +prop_pai _ x xs (FPR q) = let bf = B.easyList q (x:xs) in B.elem x bf .&&. not (B.notElem x bf) ------------------------------------------------------------------------------- @@ -84,3 +82,21 @@ prop_rechunked f s = prop_rechunked_eq :: LBS.ByteString -> Property prop_rechunked_eq = prop_rechunked hash64 + +------------------------------------------------------------------------------- +-- QC generators +------------------------------------------------------------------------------- + +newtype FPR = FPR Double + deriving stock Show + +instance Arbitrary FPR where + -- The most significant effect of the FPR is from its (negative) exponent, + -- which influences both filter bits and number of hashes. So we generate + -- values with an exponent from 10^0 to 10^-6 + arbitrary = do + m <- choose (epsilon, 1-epsilon) + e <- choose (0, 6) + pure (FPR (m * 10 ** (-e))) + where + epsilon = 1e-6 :: Double diff --git a/lsm-tree.cabal b/lsm-tree.cabal index b567f01dd..8017a4c38 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -429,18 +429,15 @@ library bloomfilter ghc-options: -O2 -Wall test-suite bloomfilter-tests - import: language + import: language, warnings type: exitcode-stdio-1.0 hs-source-dirs: bloomfilter/tests main-is: bloomfilter-tests.hs - other-modules: QCSupport build-depends: , base <5 , bytestring , lsm-tree:bloomfilter - , QuickCheck , quickcheck-instances - , random , tasty , tasty-quickcheck , vector From c1eea0660f9162bc8ebeb440d64b8bb6a74ac453 Mon Sep 17 00:00:00 2001 From: Joris Dral Date: Thu, 24 Apr 2025 10:43:17 +0200 Subject: [PATCH 07/43] bloomfilter: change the example spell program into an executable The spell example was a test suite but does not really test anything. It really is an example, not a test. Remove the Words example since it was being used as a benchmark, but we now have better benchmarks. --- bloomfilter/examples/Words.hs | 41 ----------------------------------- bloomfilter/examples/spell.hs | 5 +---- lsm-tree.cabal | 4 ++-- 3 files changed, 3 insertions(+), 47 deletions(-) delete mode 100644 bloomfilter/examples/Words.hs diff --git a/bloomfilter/examples/Words.hs b/bloomfilter/examples/Words.hs deleted file mode 100644 index 5dc385029..000000000 --- a/bloomfilter/examples/Words.hs +++ /dev/null @@ -1,41 +0,0 @@ --- This program is intended for performance analysis. It simply --- builds a Bloom filter from a list of words, one per line, and --- queries it exhaustively. - -module Main (main) where - -import Control.Monad (forM_, mapM_) -import qualified Data.BloomFilter as BF -import Data.BloomFilter.Easy (easyList, suggestSizing) -import Data.BloomFilter.Hash (cheapHashes) -import qualified Data.ByteString.Lazy.Char8 as B -import Data.Time.Clock (diffUTCTime, getCurrentTime) -import System.Environment (getArgs) - -conservative, aggressive :: Double -> [B.ByteString] -> BF.Bloom B.ByteString -conservative = easyList - -aggressive fpr xs - = let (size, numHashes) = suggestSizing (length xs) fpr - k = 3 - in BF.fromList (cheapHashes (numHashes - k)) (size * k) xs - -testFunction = conservative - -main = do - args <- getArgs - let files | null args = ["/usr/share/dict/words"] - | otherwise = args - forM_ files $ \file -> do - a <- getCurrentTime - words <- B.lines `fmap` B.readFile file - putStrLn $ {-# SCC "words/length" #-} (show (length words) ++ " words") - b <- getCurrentTime - putStrLn $ show (diffUTCTime b a) ++ "s to count words" - let filt = {-# SCC "construct" #-} testFunction 0.01 words - print filt - c <- getCurrentTime - putStrLn $ show (diffUTCTime c b) ++ "s to construct filter" - {-# SCC "query" #-} mapM_ print $ filter (not . (`BF.elem` filt)) words - d <- getCurrentTime - putStrLn $ show (diffUTCTime d c) ++ "s to query every element" diff --git a/bloomfilter/examples/spell.hs b/bloomfilter/examples/spell.hs index e6816b92d..6f31a32a1 100644 --- a/bloomfilter/examples/spell.hs +++ b/bloomfilter/examples/spell.hs @@ -12,11 +12,8 @@ import Prelude hiding (notElem) main :: IO () main = do files <- getArgs - dictionary <- readFile "/usr/share/dict/words" `catchIO` \_ -> return "yes no" + dictionary <- readFile "/usr/share/dict/words" let !bloom = easyList 0.01 (words dictionary) forM_ files $ \file -> do ws <- words <$> readFile file forM_ ws $ \w -> when (w `notElem` bloom) $ putStrLn w - -catchIO :: IO a -> (IOException -> IO a) -> IO a -catchIO = catch diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 8017a4c38..95426ddaa 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -462,9 +462,9 @@ test-suite bloomfilter-primes , base <5 , primes ^>=0.2.1.0 -test-suite bloomfilter-spell +executable bloomfilter-spell import: language - type: exitcode-stdio-1.0 + scope: private hs-source-dirs: bloomfilter/examples main-is: spell.hs build-depends: From 458ba2e1c69ccbf03702423b7b8d3f30ad692894 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Wed, 26 Mar 2025 13:52:44 +0000 Subject: [PATCH 08/43] bloomfilter: Add new size calculation code Calculate the optimal number of bits and hashes directly rather than via an optimisation algorithm. --- bloomfilter/src/Data/BloomFilter/Calc.hs | 104 ++++++++++++++++++++++- 1 file changed, 103 insertions(+), 1 deletion(-) diff --git a/bloomfilter/src/Data/BloomFilter/Calc.hs b/bloomfilter/src/Data/BloomFilter/Calc.hs index 814f49889..7d77699ea 100644 --- a/bloomfilter/src/Data/BloomFilter/Calc.hs +++ b/bloomfilter/src/Data/BloomFilter/Calc.hs @@ -2,9 +2,15 @@ module Data.BloomFilter.Calc ( falsePositiveProb, filterSize, + BloomSize (..), + bloomSizeForPolicy, + BloomPolicy (..), + bloomPolicyFPR, + bloomPolicyForFPR, + bloomPolicyForBitsPerEntry, ) where -import Numeric (expm1) +import Numeric -- $setup -- >>> import Numeric (showFFloat) @@ -36,3 +42,99 @@ filterSize :: filterSize n e k = -- recip (1 - (1 - e ** recip k) ** recip (k * n)) negate k * n / log (1 - e ** recip k) + +type FPR = Double +type BitsPerEntry = Double +type NumEntries = Int + +-- | A policy on intended bloom filter size -- independent of the number of +-- elements. +-- +-- We can decide a policy based on: +-- +-- 1. a target false positive rate (FPR) using 'bloomPolicyForFPR' +-- 2. a number of bits per entry using 'bloomPolicyForBitsPerEntry' +-- +-- A policy can be turned into a 'BloomSize' given a target 'NumEntries' using +-- 'bloomSizeForPolicy'. +-- +-- Either way we define the policy, we can inspect the result to see: +-- +-- 1. The bits per entry 'bloomPolicyBitsPerEntry'. This will determine the +-- size of the bloom filter in bits. In general the bits per entry can be +-- fractional. The final bloom filter size in will be rounded to a whole +-- number of bits. +-- 2. the number of hashes 'bloomPolicyNumHashes'. +-- +data BloomPolicy = BloomPolicy { + bloomPolicyBitsPerEntry :: !Double, + bloomPolicyNumHashes :: !Int + } + deriving Show + +bloomPolicyForFPR :: FPR -> BloomPolicy +bloomPolicyForFPR fpr | fpr <= 0 || fpr >= 1 = + error "bloomPolicyForFPR: fpr out of range (0,1)" + +bloomPolicyForFPR fpr = + BloomPolicy { + bloomPolicyBitsPerEntry = c, + bloomPolicyNumHashes = k + } + where + -- There's a simper fomula to compute the number of bits, but it assumes + -- that k is a real. We must however round k to the nearest natural, and + -- so we have to use a more precise approximation, using the actual value + -- of k. + k :: Int; k' :: Double + k = max 1 (round ((-recip_log2) * log_fpr)) + k' = fromIntegral k + c = negate k' / log1mexp (log_fpr / k') + log_fpr = log fpr + +bloomPolicyForBitsPerEntry :: BitsPerEntry -> BloomPolicy +bloomPolicyForBitsPerEntry c | c < 1 || c > 64 = + error "bloomPolicyForBitsPerEntry: out of ragnge [1,64]" + +bloomPolicyForBitsPerEntry c = + BloomPolicy { + bloomPolicyBitsPerEntry = c, + bloomPolicyNumHashes = k + } + where + k = max 1 (round (c * log2)) + +bloomPolicyFPR :: BloomPolicy -> FPR +bloomPolicyFPR BloomPolicy { + bloomPolicyBitsPerEntry = c, + bloomPolicyNumHashes = k + } = + negate (expm1 (negate (k' / c))) ** k' + where + k' = fromIntegral k + +-- | Parameters for constructing a Bloom filter. +-- +data BloomSize = BloomSize { + -- | The requested number of bits in filter. + -- The actual size will be rounded up to the nearest 512. + bloomNumBits :: !Int, + + -- | The number of hash functions to use. + bloomNumHashes :: !Int + } + deriving Show + +bloomSizeForPolicy :: BloomPolicy -> NumEntries -> BloomSize +bloomSizeForPolicy BloomPolicy { + bloomPolicyBitsPerEntry = c, + bloomPolicyNumHashes = k + } n = + BloomSize { + bloomNumBits = max 0 (ceiling (fromIntegral n * c)), + bloomNumHashes = max 1 k + } + +log2, recip_log2 :: Double +log2 = log 2 +recip_log2 = recip log2 From e4a9333a284e54b870b0c099389857b9a2fba014 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Wed, 26 Mar 2025 14:46:44 +0000 Subject: [PATCH 09/43] bloomfilter: add tests for new size calculation functions --- bloomfilter/tests/bloomfilter-tests.hs | 96 +++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/bloomfilter/tests/bloomfilter-tests.hs b/bloomfilter/tests/bloomfilter-tests.hs index 51b2fe5bf..7aef15b74 100644 --- a/bloomfilter/tests/bloomfilter-tests.hs +++ b/bloomfilter/tests/bloomfilter-tests.hs @@ -2,6 +2,7 @@ module Main (main) where import qualified Data.BloomFilter as B import qualified Data.BloomFilter.BitVec64 as BV64 +import qualified Data.BloomFilter.Calc as B import qualified Data.BloomFilter.Easy as B import Data.BloomFilter.Hash (Hashable (..), hash64) import qualified Data.BloomFilter.Internal as BI @@ -22,7 +23,13 @@ main = defaultMain tests tests :: TestTree tests = testGroup "bloomfilter" - [ testGroup "easyList" + [ testGroup "calculations" + [ testProperty "prop_calc_policy_fpr" prop_calc_policy_fpr + , testProperty "prop_calc_size_hashes_bits" prop_calc_size_hashes_bits + , testProperty "prop_calc_size_fpr_fpr" prop_calc_size_fpr_fpr + , testProperty "prop_calc_size_fpr_bits" prop_calc_size_fpr_bits + ] + , testGroup "easyList" [ testProperty "()" $ prop_pai () , testProperty "Char" $ prop_pai (undefined :: Char) , testProperty "Word32" $ prop_pai (undefined :: Word32) @@ -57,6 +64,76 @@ prop_pai :: (Hashable a) => a -> a -> [a] -> FPR -> Property prop_pai _ x xs (FPR q) = let bf = B.easyList q (x:xs) in B.elem x bf .&&. not (B.notElem x bf) +------------------------------------------------------------------------------- +-- Bloom filter size calculations +------------------------------------------------------------------------------- + +prop_calc_policy_fpr :: FPR -> Property +prop_calc_policy_fpr (FPR fpr) = + let policy = B.bloomPolicyForFPR fpr + in B.bloomPolicyFPR policy ~~~ fpr + +prop_calc_size_hashes_bits :: BitsPerEntry -> NumEntries -> Property +prop_calc_size_hashes_bits (BitsPerEntry c) (NumEntries numEntries) = + let policy = B.bloomPolicyForBitsPerEntry c + bsize = B.bloomSizeForPolicy policy numEntries + in numHashFunctions (fromIntegral (B.bloomNumBits bsize)) + (fromIntegral numEntries) + === fromIntegral (B.bloomNumHashes bsize) + +prop_calc_size_fpr_fpr :: FPR -> NumEntries -> Property +prop_calc_size_fpr_fpr (FPR fpr) (NumEntries numEntries) = + let policy = B.bloomPolicyForFPR fpr + bsize = B.bloomSizeForPolicy policy numEntries + in falsePositiveRate (fromIntegral (B.bloomNumBits bsize)) + (fromIntegral numEntries) + (fromIntegral (B.bloomNumHashes bsize)) + ~~~ fpr + +prop_calc_size_fpr_bits :: BitsPerEntry -> NumEntries -> Property +prop_calc_size_fpr_bits (BitsPerEntry c) (NumEntries numEntries) = + let policy = B.bloomPolicyForBitsPerEntry c + bsize = B.bloomSizeForPolicy policy numEntries + in falsePositiveRate (fromIntegral (B.bloomNumBits bsize)) + (fromIntegral numEntries) + (fromIntegral (B.bloomNumHashes bsize)) + ~~~ B.bloomPolicyFPR policy + +-- reference implementations used for sanity checks + +-- | Computes the optimal number of hash functions that minimises the false +-- positive rate for a bloom filter. +-- +-- See Niv Dayan, Manos Athanassoulis, Stratos Idreos, +-- /Optimal Bloom Filters and Adaptive Merging for LSM-Trees/, +-- Footnote 2, page 6. +numHashFunctions :: + Double -- ^ Number of bits assigned to the bloom filter. + -> Double -- ^ Number of entries inserted into the bloom filter. + -> Integer +numHashFunctions bits nentries = + round $ + max 1 ((bits / nentries) * log 2) + +-- | False positive rate +-- +-- See +-- +falsePositiveRate :: + Double -- ^ Number of bits assigned to the bloom filter. + -> Double -- ^ Number of entries inserted into the bloom filter. + -> Double -- ^ Number of hash functions + -> Double +falsePositiveRate m n k = + (1 - exp (-(k * n / m))) ** k + +(~~~) :: Double -> Double -> Property +a ~~~ b = + counterexample (show a ++ " /= " ++ show b) $ + abs (a - b) < epsilon + where + epsilon = 1e-6 :: Double + ------------------------------------------------------------------------------- -- Chunking ------------------------------------------------------------------------------- @@ -100,3 +177,20 @@ instance Arbitrary FPR where pure (FPR (m * 10 ** (-e))) where epsilon = 1e-6 :: Double + +newtype BitsPerEntry = BitsPerEntry Double + deriving stock Show + +instance Arbitrary BitsPerEntry where + arbitrary = BitsPerEntry <$> choose (1, 50) + +newtype NumEntries = NumEntries Int + deriving stock Show + +-- | The FPR calculations are approximations and are not expected to be +-- accurate for low numbers of entries or bits. +-- +instance Arbitrary NumEntries where + arbitrary = NumEntries <$> choose (1_000, 100_000_000) + shrink (NumEntries n) = + [ NumEntries n' | n' <- shrink n, n' >= 1000 ] From 7438e0598d3a51eba2b20a8fba10300d8f510626 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Wed, 26 Mar 2025 13:54:17 +0000 Subject: [PATCH 10/43] bloomfilter: change Easy module to use new size calculations Remove the prime-based approach. --- bloomfilter/src/Data/BloomFilter/Easy.hs | 57 +++--------------------- 1 file changed, 5 insertions(+), 52 deletions(-) diff --git a/bloomfilter/src/Data/BloomFilter/Easy.hs b/bloomfilter/src/Data/BloomFilter/Easy.hs index e06ea6012..c535a304d 100644 --- a/bloomfilter/src/Data/BloomFilter/Easy.hs +++ b/bloomfilter/src/Data/BloomFilter/Easy.hs @@ -90,60 +90,13 @@ safeSuggestSizing :: Int -- ^ expected maximum capacity -> Double -- ^ desired false positive rate (0 < /e/ < 1) -> Either String (Word64, Int) -safeSuggestSizing (fromIntegral -> capacity) errRate - | capacity <= 0 = Right (61, 1) +safeSuggestSizing capacity errRate + | capacity <= 0 = Right (1, 1) | errRate <= 0 || errRate >= 1 = Left "invalid error rate" - | otherwise = pickSize primes + | otherwise = Right (fromIntegral bloomNumBits, bloomNumHashes) where - bits :: Double - hashes :: Int - (bits, hashes) = minimum - [ (filterSize capacity errRate k, k') - | k' <- [1 .. 63] - , let k = fromIntegral k' - ] - - pickSize [] = Left "capacity too large" - pickSize (w:ws) - | fromIntegral w >= bits = Right (w, hashes) - | otherwise = pickSize ws - --- primes from around 2^6 to 2^40, with five primes per "octave", --- --- * 61, 73, 83, 97, 109 --- * 127, 139, ... --- * 257, 293, ... --- * ... --- --- The third next element is around 1.5 times larger: --- 97/63 = 1.59; 109/73 = 1.49; 127/83 = 1.52 --- --- The approximate growth rate is 1.14. --- -primes :: [Word64] -primes = - [61,73,83,97,109,127,139,167,193,223,257,293,337,389,443,509,587,673,773 - ,887,1021,1171,1327,1553,1783,2039,2351,2699,3089,3559,4093,4703,5399,6203 - ,7129,8191,9403,10799,12413,14251,16381,18803,21617,24821,28517,32749 - ,37633,43237,49667,57047,65537,75277,86467,99317,114089,131071,150559 - ,172933,198659,228203,262139,301123,345889,397337,456409,524287,602233 - ,691799,794669,912839,1048573,1204493,1383593,1589333,1825673,2097143 - ,2408993,2767201,3178667,3651341,4194301,4817977,5534413,6357353,7302683 - ,8388593,9635981,11068817,12714749,14605411,16777213,19271957,22137667 - ,25429499,29210821,33554393,38543917,44275331,50858999,58421653,67108859 - ,77087833,88550677,101718013,116843297,134217689,154175663,177101321 - ,203436029,233686637,268435399,308351357,354202703,406872031,467373223 - ,536870909,616702721,708405407,813744131,934746541,1073741789,1233405449 - ,1416810797,1627488229,1869493097,2147483647,2466810893,2833621657 - ,3254976541,3738986131,4294967291,4933621843,5667243317,6509953069 - ,7477972391,8589934583,9867243719,11334486629,13019906153,14955944737 - ,17179869143,19734487471,22668973277,26039812297,29911889569,34359738337 - ,39468974939,45337946581,52079624657,59823779149,68719476731,78937949837 - ,90675893137,104159249321,119647558343,137438953447,157875899707 - ,181351786333,208318498651,239295116717,274877906899,315751799521 - ,362703572681,416636997289,478590233419,549755813881,631503599063 - ,725407145383,833273994643,957180466901,1099511627689 - ] + BloomSize { bloomNumBits, bloomNumHashes } = + bloomSizeForPolicy (bloomPolicyForFPR errRate) capacity -- | Behaves as 'safeSuggestSizing', but calls 'error' if given -- invalid or out-of-range inputs. From d30bc58aa08c130bde306e27b121fa899b199219 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Wed, 26 Mar 2025 13:55:37 +0000 Subject: [PATCH 11/43] bloomfilter: remove primes helper program It was used to calculate the table of primes that we no longer use. --- bloomfilter/tests/primes.hs | 26 -------------------------- lsm-tree.cabal | 9 --------- 2 files changed, 35 deletions(-) delete mode 100644 bloomfilter/tests/primes.hs diff --git a/bloomfilter/tests/primes.hs b/bloomfilter/tests/primes.hs deleted file mode 100644 index 18f340b4a..000000000 --- a/bloomfilter/tests/primes.hs +++ /dev/null @@ -1,26 +0,0 @@ -{-# LANGUAGE BangPatterns #-} -module Main (main) where - -import Data.Bits ((.|.)) -import Data.Numbers.Primes - -steps :: Int -steps = 5 - --- calculate some primes exponentially spaced between 64..2^40 -sparsePrimes :: [Int] -sparsePrimes = go (6 * steps) where - go :: Int -> [Int] - go !e = if e > 40 * steps then [] else go1 e (truncate' (k ^ e)) - - go1 :: Int -> Int -> [Int] - go1 !e !n = if isPrime n then n : go (e + 1) else go1 e (n - 2) -- we count down! - - k :: Double - k = exp (log 2 / fromIntegral steps) - - -- truncate to odd - truncate' n = truncate n .|. 1 - -main :: IO () -main = print sparsePrimes diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 95426ddaa..a8a2e57b1 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -453,15 +453,6 @@ benchmark bloomfilter-bench , lsm-tree:bloomfilter , random -test-suite bloomfilter-primes - import: language - type: exitcode-stdio-1.0 - hs-source-dirs: bloomfilter/tests - main-is: primes.hs - build-depends: - , base <5 - , primes ^>=0.2.1.0 - executable bloomfilter-spell import: language scope: private From 630995b8ffd2b2db5add6d86f8b4350244f907bc Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Wed, 26 Mar 2025 14:13:02 +0000 Subject: [PATCH 12/43] bloomfilter: remove old calc functions --- bloomfilter/src/Data/BloomFilter/Calc.hs | 33 ------------------------ 1 file changed, 33 deletions(-) diff --git a/bloomfilter/src/Data/BloomFilter/Calc.hs b/bloomfilter/src/Data/BloomFilter/Calc.hs index 7d77699ea..eca507331 100644 --- a/bloomfilter/src/Data/BloomFilter/Calc.hs +++ b/bloomfilter/src/Data/BloomFilter/Calc.hs @@ -1,7 +1,5 @@ -- | Various formulas for working with bloomfilters. module Data.BloomFilter.Calc ( - falsePositiveProb, - filterSize, BloomSize (..), bloomSizeForPolicy, BloomPolicy (..), @@ -12,37 +10,6 @@ module Data.BloomFilter.Calc ( import Numeric --- $setup --- >>> import Numeric (showFFloat) - --- | Approximate probability of false positives --- \[ --- {\displaystyle \varepsilon =\left(1-\left[1-{\frac {1}{m}}\right]^{kn}\right)^{k}\approx \left(1-e^{-kn/m}\right)^{k}} --- \] --- --- >>> [ showFFloat (Just 5) (falsePositiveProb 10_000 100_000 k) "" | k <- [1..5] ] --- ["0.09516","0.03286","0.01741","0.01181","0.00943"] --- -falsePositiveProb :: - Double -- ^ /n/, number of elements - -> Double -- ^ /m/, size of bloom filter - -> Double -- ^ /k/, number of hash functions - -> Double -falsePositiveProb n m k = - -- (1 - (1 - recip m) ** (k * n)) ** k - negate (expm1 (negate (k * n / m))) ** k - --- | Filter size for given number of elements, false positive rate and --- number of hash functions. -filterSize :: - Double -- ^ /n/, number of elements - -> Double -- ^ /e/, false positive rate - -> Double -- ^ /k/, number of hash functions - -> Double -filterSize n e k = - -- recip (1 - (1 - e ** recip k) ** recip (k * n)) - negate k * n / log (1 - e ** recip k) - type FPR = Double type BitsPerEntry = Double type NumEntries = Int From 1cff8e7844f5ff598b1e43dbe54ece07a71aea4b Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Sat, 29 Mar 2025 10:19:23 +0000 Subject: [PATCH 13/43] bloomfilter: use new BloomSize type for filter construction functions This improves code clarity. Previously we would pass the number of bits and hashes separately. This makes it clearer what we're supplying by using a distinct type, and lets us directly pass the result of the size calc functions. --- bench/macro/lsm-tree-bench-bloomfilter.hs | 5 ++- bloomfilter/bench/bloomfilter-bench.hs | 3 +- bloomfilter/src/Data/BloomFilter.hs | 40 +++++++++---------- bloomfilter/src/Data/BloomFilter/Easy.hs | 30 +++++++------- bloomfilter/src/Data/BloomFilter/Mutable.hs | 15 +++---- src/Database/LSMTree/Internal/RunAcc.hs | 9 +++-- .../Database/LSMTree/Internal/BloomFilter.hs | 4 +- 7 files changed, 54 insertions(+), 52 deletions(-) diff --git a/bench/macro/lsm-tree-bench-bloomfilter.hs b/bench/macro/lsm-tree-bench-bloomfilter.hs index 300a625e8..a878a06ea 100644 --- a/bench/macro/lsm-tree-bench-bloomfilter.hs +++ b/bench/macro/lsm-tree-bench-bloomfilter.hs @@ -241,7 +241,10 @@ elemManyEnv filterSizes rng0 = stToIO $ do -- create the filters mbs <- sequence - [ MBloom.new (fromIntegralChecked numHashFuncs) (fromIntegralChecked numBits) + [ MBloom.new MBloom.BloomSize { + bloomNumBits = fromIntegralChecked numBits, + bloomNumHashes = fromIntegralChecked numHashFuncs + } | (_, _, numBits, numHashFuncs) <- filterSizes ] -- add elements foldM_ diff --git a/bloomfilter/bench/bloomfilter-bench.hs b/bloomfilter/bench/bloomfilter-bench.hs index 2686d9ed2..8b1285456 100644 --- a/bloomfilter/bench/bloomfilter-bench.hs +++ b/bloomfilter/bench/bloomfilter-bench.hs @@ -27,8 +27,7 @@ main = constructBloom :: Int -> Double -> StdGen -> B.Bloom Word64 constructBloom n fpr g0 = - let (bits, hashes) = B.suggestSizing n fpr in - B.unfold hashes bits nextElement (g0, 0) + B.unfold (B.suggestSizing n fpr) nextElement (g0, 0) where nextElement :: (StdGen, Int) -> Maybe (Word64, (StdGen, Int)) nextElement (!g, !i) diff --git a/bloomfilter/src/Data/BloomFilter.hs b/bloomfilter/src/Data/BloomFilter.hs index a554c4ca4..06faa6057 100644 --- a/bloomfilter/src/Data/BloomFilter.hs +++ b/bloomfilter/src/Data/BloomFilter.hs @@ -33,6 +33,7 @@ module Data.BloomFilter ( Bloom, MBloom, CheapHashes, + BloomSize (..), -- * Immutable Bloom filters @@ -59,7 +60,7 @@ import Control.Monad.ST (ST, runST) import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, makeHashes) import Data.BloomFilter.Internal (Bloom (..), bloomInvariant) -import Data.BloomFilter.Mutable (MBloom, insert, new) +import Data.BloomFilter.Mutable (BloomSize (..), MBloom) import qualified Data.BloomFilter.Mutable as MB import Data.Word (Word64) @@ -73,24 +74,21 @@ import qualified Data.BloomFilter.BitVec64 as V -- Example: -- -- @ --- TODO ---import "Data.BloomFilter.Hash" (cheapHashes) --- ---filter = create (cheapHashes 3) 1024 $ \mf -> do --- insertMB mf \"foo\" --- insertMB mf \"bar\" +--filter = create (BloomSize 1024 3) $ \mf -> do +-- insert mf \"foo\" +-- insert mf \"bar\" -- @ -- -- Note that the result of the setup function is not used. -create :: Int -- ^ number of hash functions to use - -> Word64 -- ^ number of bits in filter - -> (forall s. (MBloom s a -> ST s ())) -- ^ setup function - -> Bloom a +create :: BloomSize + -> (forall s. (MBloom s a -> ST s ())) -- ^ setup function + -> Bloom a {-# INLINE create #-} -create hash numBits body = runST $ do - mb <- new hash numBits - body mb - unsafeFreeze mb +create bloomsize body = + runST $ do + mb <- MB.new bloomsize + body mb + unsafeFreeze mb -- | Create an immutable Bloom filter from a mutable one. The mutable -- filter may be modified afterwards. @@ -161,16 +159,15 @@ length = size -- @b@ is used as a new seed. unfold :: forall a b. Hashable a - => Int -- ^ number of hash functions to use - -> Word64 -- ^ number of bits in filter + => BloomSize -> (b -> Maybe (a, b)) -- ^ seeding function -> b -- ^ initial seed -> Bloom a {-# INLINE unfold #-} -unfold hs numBits f k = create hs numBits (loop k) +unfold bloomsize f k = create bloomsize (loop k) where loop :: forall s. b -> MBloom s a -> ST s () loop j mb = case f j of - Just (a, j') -> insert mb a >> loop j' mb + Just (a, j') -> MB.insert mb a >> loop j' mb _ -> return () -- | Create an immutable Bloom filter, populating it from a list of @@ -184,11 +181,10 @@ unfold hs numBits f k = create hs numBits (loop k) -- filt = fromList 3 1024 [\"foo\", \"bar\", \"quux\"] -- @ fromList :: Hashable a - => Int -- ^ number of hash functions to use - -> Word64 -- ^ number of bits in filter + => BloomSize -> [a] -- ^ values to populate with -> Bloom a -fromList hs numBits list = create hs numBits $ forM_ list . insert +fromList bloomsize list = create bloomsize $ forM_ list . MB.insert -- $overview -- diff --git a/bloomfilter/src/Data/BloomFilter/Easy.hs b/bloomfilter/src/Data/BloomFilter/Easy.hs index c535a304d..8126894f9 100644 --- a/bloomfilter/src/Data/BloomFilter/Easy.hs +++ b/bloomfilter/src/Data/BloomFilter/Easy.hs @@ -30,7 +30,6 @@ import Data.BloomFilter.Hash (Hashable) import Data.BloomFilter.Mutable (MBloom) import qualified Data.BloomFilter.Mutable as MB import qualified Data.ByteString as SB -import Data.Word (Word64) ------------------------------------------------------------------------------- -- Easy interface @@ -44,21 +43,18 @@ easyList :: Hashable a -> [a] -- ^ values to populate with -> Bloom a {-# SPECIALISE easyList :: Double -> [SB.ByteString] -> Bloom SB.ByteString #-} -easyList errRate xs = B.fromList numHashes numBits xs +easyList errRate xs = + B.fromList (suggestSizing capacity errRate) xs where capacity = length xs - (numBits, numHashes) - | capacity > 0 = suggestSizing capacity errRate - | otherwise = (1, 1) -- | Create a Bloom filter with the desired false positive rate, /ε/ -- and expected maximum size, /n/. easyNew :: Double -- ^ desired false positive rate (0 < /ε/ < 1) -> Int -- ^ expected maximum size, /n/ -> ST s (MBloom s a) -easyNew errRate capacity = MB.new numHashes numBits - where - (numBits, numHashes) = suggestSizing capacity errRate +easyNew errRate capacity = + MB.new (suggestSizing capacity errRate) ------------------------------------------------------------------------------- -- Size suggestions @@ -89,20 +85,22 @@ easyNew errRate capacity = MB.new numHashes numBits safeSuggestSizing :: Int -- ^ expected maximum capacity -> Double -- ^ desired false positive rate (0 < /e/ < 1) - -> Either String (Word64, Int) + -> Either String BloomSize safeSuggestSizing capacity errRate - | capacity <= 0 = Right (1, 1) - | errRate <= 0 || errRate >= 1 = Left "invalid error rate" - | otherwise = Right (fromIntegral bloomNumBits, bloomNumHashes) - where - BloomSize { bloomNumBits, bloomNumHashes } = - bloomSizeForPolicy (bloomPolicyForFPR errRate) capacity + | capacity <= 0 = Right BloomSize { + bloomNumBits = 1, + bloomNumHashes = 1 + } + | errRate <= 0 || + errRate >= 1 = Left "invalid error rate" + | otherwise = Right $ bloomSizeForPolicy (bloomPolicyForFPR errRate) + capacity -- | Behaves as 'safeSuggestSizing', but calls 'error' if given -- invalid or out-of-range inputs. suggestSizing :: Int -- ^ expected maximum capacity -> Double -- ^ desired false positive rate (0 < /e/ < 1) - -> (Word64, Int) + -> BloomSize suggestSizing cap errs = either fatal id (safeSuggestSizing cap errs) where fatal = error . ("Data.BloomFilter.Util.suggestSizing: " ++) diff --git a/bloomfilter/src/Data/BloomFilter/Mutable.hs b/bloomfilter/src/Data/BloomFilter/Mutable.hs index fed2c09ec..e5cf7203b 100644 --- a/bloomfilter/src/Data/BloomFilter/Mutable.hs +++ b/bloomfilter/src/Data/BloomFilter/Mutable.hs @@ -31,6 +31,7 @@ module Data.BloomFilter.Mutable ( -- * Mutable Bloom filters -- ** Creation + BloomSize (..), new, -- ** Accessors @@ -43,12 +44,13 @@ module Data.BloomFilter.Mutable ( import Control.Monad (liftM) import Control.Monad.ST (ST) -import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, - makeHashes) import Data.Kind (Type) import Data.Word (Word64) import qualified Data.BloomFilter.BitVec64 as V +import Data.BloomFilter.Calc (BloomSize (..)) +import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, + makeHashes) import Prelude hiding (elem, length) @@ -68,13 +70,12 @@ instance Show (MBloom s a) where -- -- The size is ceiled at $2^48$. Tell us if you need bigger bloom filters. -- -new :: Int -- ^ number of hash functions to use - -> Word64 -- ^ number of bits in filter - -> ST s (MBloom s a) -new hash numBits = MBloom hash numBits' `liftM` V.new numBits' +new :: BloomSize -> ST s (MBloom s a) +new BloomSize {bloomNumBits = numBits, bloomNumHashes = hash} = + MBloom hash numBits' `liftM` V.new numBits' where numBits' | numBits == 0 = 1 | numBits >= 0xffff_ffff_ffff = 0x1_0000_0000_0000 - | otherwise = numBits + | otherwise = fromIntegral numBits -- | Insert a value into a mutable Bloom filter. Afterwards, a -- membership query for the same value is guaranteed to return @True@. diff --git a/src/Database/LSMTree/Internal/RunAcc.hs b/src/Database/LSMTree/Internal/RunAcc.hs index 5cce81cca..2d45f1a53 100644 --- a/src/Database/LSMTree/Internal/RunAcc.hs +++ b/src/Database/LSMTree/Internal/RunAcc.hs @@ -336,10 +336,13 @@ instance NFData RunBloomFilterAlloc where newMBloom :: NumEntries -> RunBloomFilterAlloc -> ST s (MBloom s a) newMBloom (NumEntries nentries) = \case RunAllocFixed !bitsPerEntry -> - let !nbits = fromIntegral bitsPerEntry * fromIntegral nentries + let nbits :: Int + !nbits = fromIntegral bitsPerEntry * nentries in MBloom.new - (fromIntegralChecked $ numHashFunctions nbits (fromIntegralChecked nentries)) - (fromIntegralChecked nbits) + Bloom.BloomSize { + bloomNumBits = nbits, + bloomNumHashes = fromIntegralChecked $ numHashFunctions (fromIntegral nbits) (fromIntegralChecked nentries) + } RunAllocRequestFPR !fpr -> Bloom.Easy.easyNew fpr nentries diff --git a/test/Test/Database/LSMTree/Internal/BloomFilter.hs b/test/Test/Database/LSMTree/Internal/BloomFilter.hs index f36ce3336..352196a46 100644 --- a/test/Test/Database/LSMTree/Internal/BloomFilter.hs +++ b/test/Test/Database/LSMTree/Internal/BloomFilter.hs @@ -53,7 +53,9 @@ roundtrip_prop (Positive (Small hfN)) (limitBits -> bits) ws = counterexample (show sbs) $ Right lhs === rhs where - lhs = BF.fromList hfN bits ws + sz = BF.BloomSize { bloomNumBits = fromIntegral bits, + bloomNumHashes = hfN } + lhs = BF.fromList sz ws sbs = SBS.toShort (LBS.toStrict (bloomFilterToLBS lhs)) rhs = bloomFilterFromSBS sbs From b6a4675acf683b8f0a35c91db06035d137e1f4ab Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Sat, 29 Mar 2025 11:49:06 +0000 Subject: [PATCH 14/43] bloomfilter: change length to size returning BloomSize and rename the (M)Bloom field names. Also change internal rep of numBits to use Int rather than Word64. We already use Int in BloomSize and we only support up to 2^48 bits anyway on 64bit machines. Switch to record rather than positional style for (M)Bloom so we can more easily change representation. Temporarily disable the bloom-query-fast mechanism. The implementation of BloomFilterQuery2 will need significant updates. --- bench/macro/lsm-tree-bench-lookups.hs | 9 ++-- bloomfilter/src/Data/BloomFilter.hs | 54 ++++++++++++++------ bloomfilter/src/Data/BloomFilter/Easy.hs | 2 +- bloomfilter/src/Data/BloomFilter/Internal.hs | 14 ++--- bloomfilter/src/Data/BloomFilter/Mutable.hs | 48 ++++++++++------- bloomfilter/tests/bloomfilter-tests.hs | 8 +-- lsm-tree.cabal | 10 ++-- src/Database/LSMTree/Internal/BloomFilter.hs | 16 ++++-- test/Test/Database/LSMTree/Internal/Merge.hs | 3 +- 9 files changed, 102 insertions(+), 62 deletions(-) diff --git a/bench/macro/lsm-tree-bench-lookups.hs b/bench/macro/lsm-tree-bench-lookups.hs index 5edebc588..b40065149 100644 --- a/bench/macro/lsm-tree-bench-lookups.hs +++ b/bench/macro/lsm-tree-bench-lookups.hs @@ -11,7 +11,6 @@ import Control.RefCount import Data.Bits ((.&.)) import Data.BloomFilter (Bloom) import qualified Data.BloomFilter as Bloom -import qualified Data.BloomFilter.Internal as Bloom import Data.Time import qualified Data.Vector as V import Data.Vector.Algorithms.Merge as Merge @@ -167,14 +166,16 @@ benchmarks !caching = withFS $ \hfs hbio -> do traceMarkerIO "Computing statistics for generated runs" let numEntries = V.map Run.size runs numPages = V.map Run.sizeInPages runs - nhashes = V.map Bloom.hashesN blooms + nhashes = V.map (Bloom.bloomNumHashes . Bloom.size) blooms bitsPerEntry = V.zipWith - (\b (NumEntries n) -> fromIntegral (Bloom.length b) / fromIntegral n :: Double) + (\b (NumEntries n) -> + fromIntegral (Bloom.bloomNumBits (Bloom.size b)) + / fromIntegral n :: Double) blooms numEntries stats = V.zip4 numEntries numPages nhashes bitsPerEntry putStrLn "Actual stats for generated runs:" - putStrLn "(numEntries, numPages, hashesN, bits per entry)" + putStrLn "(numEntries, numPages, numHashes, bits per entry)" mapM_ print stats _ <- putStr "Pausing. Drop caches now! When ready, press enter." >> getLine diff --git a/bloomfilter/src/Data/BloomFilter.hs b/bloomfilter/src/Data/BloomFilter.hs index 06faa6057..c6b9df226 100644 --- a/bloomfilter/src/Data/BloomFilter.hs +++ b/bloomfilter/src/Data/BloomFilter.hs @@ -48,14 +48,14 @@ module Data.BloomFilter ( fromList, -- ** Accessors - length, + size, elem, elemHashes, notElem, ) where import Control.Exception (assert) -import Control.Monad (forM_, liftM) +import Control.Monad (forM_) import Control.Monad.ST (ST, runST) import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, makeHashes) @@ -64,7 +64,7 @@ import Data.BloomFilter.Mutable (BloomSize (..), MBloom) import qualified Data.BloomFilter.Mutable as MB import Data.Word (Word64) -import Prelude hiding (elem, length, notElem) +import Prelude hiding (elem, notElem) import qualified Data.BloomFilter.BitVec64 as V @@ -93,24 +93,38 @@ create bloomsize body = -- | Create an immutable Bloom filter from a mutable one. The mutable -- filter may be modified afterwards. freeze :: MBloom s a -> ST s (Bloom a) -freeze mb = do - ba <- V.freeze (MB.bitArray mb) - let !bf = Bloom (MB.hashesN mb) (MB.size mb) ba +freeze MB.MBloom { numBits, numHashes, bitArray } = do + bitArray' <- V.freeze bitArray + let !bf = Bloom { + numHashes, + numBits, + bitArray = bitArray' + } assert (bloomInvariant bf) $ pure bf -- | Create an immutable Bloom filter from a mutable one. The mutable -- filter /must not/ be modified afterwards, or a runtime crash may -- occur. For a safer creation interface, use 'freeze' or 'create'. unsafeFreeze :: MBloom s a -> ST s (Bloom a) -unsafeFreeze mb = do - ba <- V.unsafeFreeze (MB.bitArray mb) - let !bf = Bloom (MB.hashesN mb) (MB.size mb) ba +unsafeFreeze MB.MBloom { numBits, numHashes, bitArray } = do + bitArray' <- V.unsafeFreeze bitArray + let !bf = Bloom { + numHashes, + numBits, + bitArray = bitArray' + } assert (bloomInvariant bf) $ pure bf -- | Copy an immutable Bloom filter to create a mutable one. There is -- no non-copying equivalent. thaw :: Bloom a -> ST s (MBloom s a) -thaw ub = MB.MBloom (hashesN ub) (size ub) `liftM` V.thaw (bitArray ub) +thaw Bloom { numBits, numHashes, bitArray } = do + bitArray' <- V.thaw bitArray + pure MB.MBloom { + numBits, + numHashes, + bitArray = bitArray' + } -- | Query an immutable Bloom filter for membership. If the value is -- present, return @True@. If the value is not present, there is @@ -120,18 +134,20 @@ elem elt ub = elemHashes (makeHashes elt) ub -- | Query an immutable Bloom filter for membership using already constructed 'Hashes' value. elemHashes :: CheapHashes a -> Bloom a -> Bool -elemHashes !ch !ub = go 0 where +elemHashes !ch Bloom { numBits, numHashes, bitArray } = + go 0 + where go :: Int -> Bool - go !i | i >= hashesN ub + go !i | i >= numHashes = True go !i = let idx' :: Word64 !idx' = evalHashes ch i in let idx :: Int - !idx = fromIntegral (idx' `V.unsafeRemWord64` size ub) in + !idx = fromIntegral (idx' `V.unsafeRemWord64` fromIntegral numBits) in -- While the idx' can cover the full Word64 range, -- after taking the remainder, it now must fit in -- and Int because it's less than the filter size. - if V.unsafeIndex (bitArray ub) idx + if V.unsafeIndex bitArray idx then go (i + 1) else False @@ -145,9 +161,13 @@ notElem elt ub = notElemHashes (makeHashes elt) ub notElemHashes :: CheapHashes a -> Bloom a -> Bool notElemHashes !ch !ub = not (elemHashes ch ub) --- | Return the size of an immutable Bloom filter, in bits. -length :: Bloom a -> Word64 -length = size +-- | Return the size of the Bloom filter. +size :: Bloom a -> BloomSize +size Bloom { numBits, numHashes } = + BloomSize { + bloomNumBits = numBits, + bloomNumHashes = numHashes + } -- | Build an immutable Bloom filter from a seed value. The seeding -- function populates the filter as follows. diff --git a/bloomfilter/src/Data/BloomFilter/Easy.hs b/bloomfilter/src/Data/BloomFilter/Easy.hs index 8126894f9..87343f383 100644 --- a/bloomfilter/src/Data/BloomFilter/Easy.hs +++ b/bloomfilter/src/Data/BloomFilter/Easy.hs @@ -5,7 +5,7 @@ module Data.BloomFilter.Easy ( easyList, B.elem, B.notElem, - B.length, + B.size, -- * Mutable bloom filter MBloom, diff --git a/bloomfilter/src/Data/BloomFilter/Internal.hs b/bloomfilter/src/Data/BloomFilter/Internal.hs index cd737ddbb..ccc726e36 100644 --- a/bloomfilter/src/Data/BloomFilter/Internal.hs +++ b/bloomfilter/src/Data/BloomFilter/Internal.hs @@ -11,18 +11,17 @@ import qualified Data.BloomFilter.BitVec64 as V import Data.Kind (Type) import Data.Primitive.ByteArray (sizeofByteArray) import qualified Data.Vector.Primitive as VP -import Data.Word (Word64) type Bloom :: Type -> Type data Bloom a = Bloom { - hashesN :: {-# UNPACK #-} !Int - , size :: {-# UNPACK #-} !Word64 -- ^ size is non-zero - , bitArray :: {-# UNPACK #-} !V.BitVec64 + numBits :: {-# UNPACK #-} !Int -- ^ non-zero + , numHashes :: {-# UNPACK #-} !Int + , bitArray :: {-# UNPACK #-} !V.BitVec64 } type role Bloom nominal bloomInvariant :: Bloom a -> Bool -bloomInvariant (Bloom _ s (V.BV64 (VP.Vector off len ba))) = +bloomInvariant Bloom { numBits = s, bitArray = V.BV64 (VP.Vector off len ba) } = s > 0 && s <= 2^(48 :: Int) && off >= 0 @@ -35,7 +34,8 @@ instance Eq (Bloom a) where -- We support arbitrary sized bitvectors, -- therefore an equality is a bit involved: -- we need to be careful when comparing the last bits of bitArray. - Bloom k n (V.BV64 v) == Bloom k' n' (V.BV64 v') = + (==) Bloom { numBits = n, numHashes = k, bitArray = V.BV64 v } + Bloom { numBits = n', numHashes = k', bitArray = V.BV64 v' } = k == k' && n == n' && VP.take w v == VP.take w v' && -- compare full words @@ -50,7 +50,7 @@ instance Eq (Bloom a) where x' = VP.unsafeIndex v' w instance Show (Bloom a) where - show mb = "Bloom { " ++ show (size mb) ++ " bits } " + show mb = "Bloom { " ++ show (numBits mb) ++ " bits } " instance NFData (Bloom a) where rnf !_ = () diff --git a/bloomfilter/src/Data/BloomFilter/Mutable.hs b/bloomfilter/src/Data/BloomFilter/Mutable.hs index e5cf7203b..756fef120 100644 --- a/bloomfilter/src/Data/BloomFilter/Mutable.hs +++ b/bloomfilter/src/Data/BloomFilter/Mutable.hs @@ -35,47 +35,50 @@ module Data.BloomFilter.Mutable ( new, -- ** Accessors - length, + size, elem, -- ** Mutation insert, ) where -import Control.Monad (liftM) import Control.Monad.ST (ST) import Data.Kind (Type) -import Data.Word (Word64) import qualified Data.BloomFilter.BitVec64 as V import Data.BloomFilter.Calc (BloomSize (..)) import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, makeHashes) -import Prelude hiding (elem, length) +import Prelude hiding (elem) type MBloom :: Type -> Type -> Type -- | A mutable Bloom filter, for use within the 'ST' monad. data MBloom s a = MBloom { - hashesN :: {-# UNPACK #-} !Int - , size :: {-# UNPACK #-} !Word64 -- ^ size is non-zero - , bitArray :: {-# UNPACK #-} !(V.MBitVec64 s) + numBits :: {-# UNPACK #-} !Int -- ^ non-zero + , numHashes :: {-# UNPACK #-} !Int + , bitArray :: {-# UNPACK #-} !(V.MBitVec64 s) } type role MBloom nominal nominal instance Show (MBloom s a) where - show mb = "MBloom { " ++ show (size mb) ++ " bits } " + show mb = "MBloom { " ++ show (numBits mb) ++ " bits } " -- | Create a new mutable Bloom filter. -- -- The size is ceiled at $2^48$. Tell us if you need bigger bloom filters. -- new :: BloomSize -> ST s (MBloom s a) -new BloomSize {bloomNumBits = numBits, bloomNumHashes = hash} = - MBloom hash numBits' `liftM` V.new numBits' +new BloomSize { bloomNumBits = numBits, bloomNumHashes } = do + bitArray <- V.new (fromIntegral numBits') + pure MBloom { + numBits = numBits', + numHashes = bloomNumHashes, + bitArray + } where numBits' | numBits == 0 = 1 | numBits >= 0xffff_ffff_ffff = 0x1_0000_0000_0000 - | otherwise = fromIntegral numBits + | otherwise = numBits -- | Insert a value into a mutable Bloom filter. Afterwards, a -- membership query for the same value is guaranteed to return @True@. @@ -83,10 +86,11 @@ insert :: Hashable a => MBloom s a -> a -> ST s () insert !mb !x = insertHashes mb (makeHashes x) insertHashes :: MBloom s a -> CheapHashes a -> ST s () -insertHashes (MBloom k m v) !h = go 0 +insertHashes MBloom { numBits = m, numHashes = k, bitArray = v } !h = + go 0 where go !i | i >= k = return () - | otherwise = let !idx = evalHashes h i `rem` m + | otherwise = let !idx = evalHashes h i `rem` fromIntegral m in V.unsafeWrite v idx True >> go (i + 1) -- | Query a mutable Bloom filter for membership. If the value is @@ -96,19 +100,25 @@ elem :: Hashable a => a -> MBloom s a -> ST s Bool elem elt mb = elemHashes (makeHashes elt) mb elemHashes :: forall s a. CheapHashes a -> MBloom s a -> ST s Bool -elemHashes !ch (MBloom k m v) = go 0 where +elemHashes !ch MBloom { numBits = m, numHashes = k, bitArray = v } = + go 0 + where go :: Int -> ST s Bool go !i | i >= k = return True | otherwise = do let !idx' = evalHashes ch i - let !idx = idx' `rem` m + let !idx = idx' `rem` fromIntegral m b <- V.unsafeRead v idx if b then go (i + 1) - else return False --- | Return the size of a mutable Bloom filter, in bits. -length :: MBloom s a -> Word64 -length = size + else return False +-- | Return the size of the Bloom filter. +size :: MBloom s a -> BloomSize +size MBloom { numBits, numHashes } = + BloomSize { + bloomNumBits = numBits, + bloomNumHashes = numHashes + } -- $overview -- diff --git a/bloomfilter/tests/bloomfilter-tests.hs b/bloomfilter/tests/bloomfilter-tests.hs index 7aef15b74..6bba3d2f1 100644 --- a/bloomfilter/tests/bloomfilter-tests.hs +++ b/bloomfilter/tests/bloomfilter-tests.hs @@ -47,12 +47,12 @@ tests = testGroup "bloomfilter" ] , testGroup "equality" [ testProperty "doesn't care about leftover bits a" $ - BI.Bloom 1 48 (BV64.BV64 (VP.singleton 0xffff_0000_1234_5678)) === - BI.Bloom 1 48 (BV64.BV64 (VP.singleton 0xeeee_0000_1234_5678)) + BI.Bloom 48 1 (BV64.BV64 (VP.singleton 0xffff_0000_1234_5678)) === + BI.Bloom 48 1 (BV64.BV64 (VP.singleton 0xeeee_0000_1234_5678)) , testProperty "doesn't care about leftover bits b" $ - BI.Bloom 1 49 (BV64.BV64 (VP.singleton 0xffff_0000_1234_5678)) =/= - BI.Bloom 1 49 (BV64.BV64 (VP.singleton 0xeeee_0000_1234_5678)) + BI.Bloom 49 1 (BV64.BV64 (VP.singleton 0xffff_0000_1234_5678)) =/= + BI.Bloom 49 1 (BV64.BV64 (VP.singleton 0xeeee_0000_1234_5678)) ] ] diff --git a/lsm-tree.cabal b/lsm-tree.cabal index a8a2e57b1..643542fbb 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -268,7 +268,9 @@ flag bloom-query-fast common bloom-query-fast if (flag(bloom-query-fast) && impl(ghc >=9.4)) - cpp-options: -DBLOOM_QUERY_FAST + +--TODO: temporarily disabled: +--cpp-options: -DBLOOM_QUERY_FAST library import: language, warnings, wno-x-partial, bloom-query-fast @@ -359,10 +361,10 @@ library if (flag(bloom-query-fast) && impl(ghc >=9.4)) -- The bulk bloom filter query uses some fancy stuff - exposed-modules: - Database.LSMTree.Internal.BloomFilterQuery2 - Database.LSMTree.Internal.StrictArray + exposed-modules: Database.LSMTree.Internal.StrictArray + --TODO: temporarily disabled + -- Database.LSMTree.Internal.BloomFilterQuery2 build-depends: data-elevator ^>=0.1.0.2 || ^>=0.2 -- this exists due windows diff --git a/src/Database/LSMTree/Internal/BloomFilter.hs b/src/Database/LSMTree/Internal/BloomFilter.hs index 471fa9f7c..e704af87b 100644 --- a/src/Database/LSMTree/Internal/BloomFilter.hs +++ b/src/Database/LSMTree/Internal/BloomFilter.hs @@ -31,14 +31,16 @@ bloomFilterVersion = 1 bloomFilterToLBS :: BF.Bloom a -> LBS.ByteString bloomFilterToLBS b@(BF.Bloom _ _ bv) = - header b <> LBS.fromStrict (bitVec bv) + header <> LBS.fromStrict (bitVec bv) where - header (BF.Bloom hashesN len _) = + header = -- creates a single 16 byte chunk B.toLazyByteStringWith (B.safeStrategy 16 B.smallChunkSize) mempty $ B.word32Host bloomFilterVersion - <> B.word32Host (fromIntegral hashesN) - <> B.word64Host len + <> B.word32Host (fromIntegral bloomNumHashes) + <> B.word64Host (fromIntegral bloomNumBits) + where + BF.BloomSize { bloomNumBits, bloomNumHashes } = BF.size b bitVec (BV64.BV64 (VP.Vector off len ba)) = byteArrayToByteString (mul8 off) (mul8 len) ba @@ -83,7 +85,11 @@ bloomFilterFromSBS (SBS ba') = do let vec64 :: VP.Vector Word64 vec64 = mkPrimVector 2 len64 ba - let bloom = BF.Bloom (fromIntegral hsn) len (BV64.BV64 vec64) + let bloom = BF.Bloom { + numBits = fromIntegral len, + numHashes = fromIntegral hsn, + bitArray = BV64.BV64 vec64 + } assert (BF.bloomInvariant bloom) $ return bloom where ba :: ByteArray diff --git a/test/Test/Database/LSMTree/Internal/Merge.hs b/test/Test/Database/LSMTree/Internal/Merge.hs index be7c87365..1d92c8dc5 100644 --- a/test/Test/Database/LSMTree/Internal/Merge.hs +++ b/test/Test/Database/LSMTree/Internal/Merge.hs @@ -103,7 +103,8 @@ prop_MergeDistributes fs hbio mergeType stepSize (SmallList rds) = do (lhsSize === rhsSize) .&&. -- we can't just test bloom filter equality, their sizes may differ. counterexample "runFilter" - (Bloom.length lhsFilter >= Bloom.length rhsFilter) + ( Bloom.bloomNumBits (Bloom.size lhsFilter) + >= Bloom.bloomNumBits (Bloom.size rhsFilter)) .&&. -- the index is equal, but only because range finder precision is -- always 0 for the numbers of entries we are dealing with. counterexample "runIndex" From 14784e9cd7630c746e92ee6f8dae6c46907b45c1 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Sat, 29 Mar 2025 15:39:57 +0000 Subject: [PATCH 15/43] bloomfilter: add (de)serialise functions, for better abstraction At the moment, the bloomfilter sub-library does not itself support serialisation, and this is handled by the lsm-tree lib directly. It does so by being overly familiar with the internal representation of the bloomfilter lib. We still want the file format to be determined by the lsm-tree library, but we would like a better abstraction boundary between the bloomfilter and lsm-tree libraries. So we introduce functions to convert the bloom filters to/from a lower level representation as a BloomSize and a (Mutable)ByteArray for the bits. --- bloomfilter/src/Data/BloomFilter.hs | 29 ++++++++++++++++++-- bloomfilter/src/Data/BloomFilter/BitVec64.hs | 21 +++++++++++++- bloomfilter/src/Data/BloomFilter/Mutable.hs | 12 ++++++++ 3 files changed, 58 insertions(+), 4 deletions(-) diff --git a/bloomfilter/src/Data/BloomFilter.hs b/bloomfilter/src/Data/BloomFilter.hs index c6b9df226..803f0bf4b 100644 --- a/bloomfilter/src/Data/BloomFilter.hs +++ b/bloomfilter/src/Data/BloomFilter.hs @@ -46,28 +46,33 @@ module Data.BloomFilter ( create, unfold, fromList, + deserialise, -- ** Accessors size, elem, elemHashes, notElem, + serialise, ) where import Control.Exception (assert) import Control.Monad (forM_) +import Control.Monad.Primitive (PrimMonad, PrimState, RealWorld, + stToPrim) import Control.Monad.ST (ST, runST) +import Data.Primitive.ByteArray (ByteArray, MutableByteArray) +import Data.Word (Word64) + +import qualified Data.BloomFilter.BitVec64 as V import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, makeHashes) import Data.BloomFilter.Internal (Bloom (..), bloomInvariant) import Data.BloomFilter.Mutable (BloomSize (..), MBloom) import qualified Data.BloomFilter.Mutable as MB -import Data.Word (Word64) import Prelude hiding (elem, notElem) -import qualified Data.BloomFilter.BitVec64 as V - -- | Create an immutable Bloom filter, using the given setup function -- which executes in the 'ST' monad. -- @@ -206,6 +211,24 @@ fromList :: Hashable a -> Bloom a fromList bloomsize list = create bloomsize $ forM_ list . MB.insert +serialise :: Bloom a -> (BloomSize, ByteArray, Int, Int) +serialise b@Bloom{bitArray} = + (size b, ba, off, len) + where + (ba, off, len) = V.serialise bitArray + +{-# SPECIALISE deserialise :: BloomSize + -> (MutableByteArray RealWorld -> Int -> Int -> IO ()) + -> IO (Bloom a) #-} +deserialise :: PrimMonad m + => BloomSize + -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) + -> m (Bloom a) +deserialise bloomsize fill = do + mbloom <- stToPrim $ MB.new bloomsize + MB.deserialise mbloom fill + stToPrim $ unsafeFreeze mbloom + -- $overview -- -- Each of the functions for creating Bloom filters accepts two parameters: diff --git a/bloomfilter/src/Data/BloomFilter/BitVec64.hs b/bloomfilter/src/Data/BloomFilter/BitVec64.hs index 1160c9831..73bb0e153 100644 --- a/bloomfilter/src/Data/BloomFilter/BitVec64.hs +++ b/bloomfilter/src/Data/BloomFilter/BitVec64.hs @@ -14,12 +14,14 @@ module Data.BloomFilter.BitVec64 ( unsafeFreeze, thaw, unsafeRemWord64, + serialise, + deserialise, ) where import Control.Monad.ST (ST) import Data.Bits import Data.Primitive.ByteArray (ByteArray (ByteArray), - newPinnedByteArray, setByteArray) + MutableByteArray, newPinnedByteArray, setByteArray) import qualified Data.Vector.Primitive as VP import qualified Data.Vector.Primitive.Mutable as VPM import Data.Word (Word64, Word8) @@ -80,6 +82,23 @@ new s !numWords = w2i (roundUpTo64 s) !numBytes = unsafeShiftL numWords 3 -- * 8 +serialise :: BitVec64 -> (ByteArray, Int, Int) +serialise = asByteArray + where + asByteArray (BV64 (VP.Vector off len ba)) = + (ba, off * 8, len * 8) + +-- | Do an inplace overwrite of the byte array representing the bit block. +deserialise :: MBitVec64 s + -> (MutableByteArray s -> Int -> Int -> m ()) + -> m () +deserialise bitArray fill = + let (ba, off, len) = asMutableByteArray bitArray + in fill ba off len + where + asMutableByteArray (MBV64 (VP.MVector off len mba)) = + (mba, off * 8, len * 8) + unsafeWrite :: MBitVec64 s -> Word64 -> Bool -> ST s () unsafeWrite (MBV64 mbv) i x = do VPM.unsafeModify mbv (\w -> if x then setBit w (w2i k) else clearBit w (w2i k)) (w2i j) diff --git a/bloomfilter/src/Data/BloomFilter/Mutable.hs b/bloomfilter/src/Data/BloomFilter/Mutable.hs index 756fef120..933848c20 100644 --- a/bloomfilter/src/Data/BloomFilter/Mutable.hs +++ b/bloomfilter/src/Data/BloomFilter/Mutable.hs @@ -40,10 +40,13 @@ module Data.BloomFilter.Mutable ( -- ** Mutation insert, + deserialise, ) where +import Control.Monad.Primitive (PrimState) import Control.Monad.ST (ST) import Data.Kind (Type) +import Data.Primitive.ByteArray (MutableByteArray) import qualified Data.BloomFilter.BitVec64 as V import Data.BloomFilter.Calc (BloomSize (..)) @@ -80,6 +83,15 @@ new BloomSize { bloomNumBits = numBits, bloomNumHashes } = do | numBits >= 0xffff_ffff_ffff = 0x1_0000_0000_0000 | otherwise = numBits +-- | Modify the filter's bit array. The callback is expected to read (exactly) +-- the given number of bytes into the given byte array buffer. +-- +deserialise :: MBloom (PrimState m) a + -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) + -> m () +deserialise MBloom {bitArray} fill = + V.deserialise bitArray fill + -- | Insert a value into a mutable Bloom filter. Afterwards, a -- membership query for the same value is guaranteed to return @True@. insert :: Hashable a => MBloom s a -> a -> ST s () From 3c834766ca30821ad8cd8371793db8f6ac4cae17 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Sat, 29 Mar 2025 15:44:58 +0000 Subject: [PATCH 16/43] convert bloomFilterToLBS to use new Bloom.serialise --- src/Database/LSMTree/Internal/BloomFilter.hs | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Database/LSMTree/Internal/BloomFilter.hs b/src/Database/LSMTree/Internal/BloomFilter.hs index e704af87b..98271c057 100644 --- a/src/Database/LSMTree/Internal/BloomFilter.hs +++ b/src/Database/LSMTree/Internal/BloomFilter.hs @@ -13,8 +13,8 @@ import qualified Data.BloomFilter.Internal as BF import qualified Data.ByteString.Builder.Extra as B import qualified Data.ByteString.Lazy as LBS import Data.ByteString.Short (ShortByteString (SBS)) -import qualified Data.Primitive as P -import Data.Primitive.ByteArray (ByteArray (ByteArray)) +import qualified Data.Primitive.ByteArray as P +import qualified Data.Primitive.PrimArray as P import qualified Data.Vector.Primitive as VP import Data.Word (Word32, Word64, byteSwap32) import Database.LSMTree.Internal.BitMath (ceilDiv64, mul8) @@ -30,20 +30,20 @@ bloomFilterVersion :: Word32 bloomFilterVersion = 1 bloomFilterToLBS :: BF.Bloom a -> LBS.ByteString -bloomFilterToLBS b@(BF.Bloom _ _ bv) = - header <> LBS.fromStrict (bitVec bv) +bloomFilterToLBS bf = + let (size, ba, off, len) = BF.serialise bf + in header size <> byteArrayToLBS ba off len where - header = + header BF.BloomSize { bloomNumBits, bloomNumHashes } = -- creates a single 16 byte chunk B.toLazyByteStringWith (B.safeStrategy 16 B.smallChunkSize) mempty $ B.word32Host bloomFilterVersion <> B.word32Host (fromIntegral bloomNumHashes) <> B.word64Host (fromIntegral bloomNumBits) - where - BF.BloomSize { bloomNumBits, bloomNumHashes } = BF.size b - bitVec (BV64.BV64 (VP.Vector off len ba)) = - byteArrayToByteString (mul8 off) (mul8 len) ba + byteArrayToLBS :: P.ByteArray -> Int -> Int -> LBS.ByteString + byteArrayToLBS ba off len = + LBS.fromStrict (byteArrayToByteString off len ba) -- deserialising ----------------------------------------------------------- @@ -92,8 +92,8 @@ bloomFilterFromSBS (SBS ba') = do } assert (BF.bloomInvariant bloom) $ return bloom where - ba :: ByteArray - ba = ByteArray ba' + ba :: P.ByteArray + ba = P.ByteArray ba' word32pa :: P.PrimArray Word32 word32pa = P.PrimArray ba' From 305bff5bf8cae37045d543e26f4a4540dd331712 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Mon, 31 Mar 2025 12:21:12 +0100 Subject: [PATCH 17/43] Switch FsPath to FsErrorPath in FileCorruptedError exception type This provides more detail, and makes it easier to convert exceptions from the fs-api into this exception type (since it contains a FsErrorPath). --- src/Database/LSMTree/Internal/CRC32C.hs | 52 ++++++++++++------- src/Database/LSMTree/Internal/Run.hs | 11 ++-- src/Database/LSMTree/Internal/Snapshot.hs | 3 +- .../LSMTree/Internal/Snapshot/Codec.hs | 8 ++- 4 files changed, 45 insertions(+), 29 deletions(-) diff --git a/src/Database/LSMTree/Internal/CRC32C.hs b/src/Database/LSMTree/Internal/CRC32C.hs index 5bd62ee94..5a995efeb 100644 --- a/src/Database/LSMTree/Internal/CRC32C.hs +++ b/src/Database/LSMTree/Internal/CRC32C.hs @@ -282,24 +282,30 @@ newtype ChecksumsFileName = ChecksumsFileName {unChecksumsFileName :: BSC.ByteSt {-# SPECIALISE getChecksum :: - FsPath + HasFS IO h + -> FsPath -> ChecksumsFile -> ChecksumsFileName -> IO CRC32C #-} getChecksum :: MonadThrow m - => FsPath + => HasFS m h + -> FsPath -> ChecksumsFile -> ChecksumsFileName -> m CRC32C -getChecksum fsPath checksumsFile checksumsFileName = +getChecksum hfs fsPath checksumsFile checksumsFileName = case Map.lookup checksumsFileName checksumsFile of Just checksum -> pure checksum Nothing -> - throwIO . ErrFileFormatInvalid fsPath FormatChecksumsFile $ - "could not find checksum for " <> show (unChecksumsFileName checksumsFileName) + throwIO $ + ErrFileFormatInvalid + (mkFsErrorPath hfs fsPath) + FormatChecksumsFile + ("could not find checksum for " <> + show (unChecksumsFileName checksumsFileName)) {-# SPECIALISE readChecksumsFile :: @@ -314,7 +320,7 @@ readChecksumsFile :: -> m ChecksumsFile readChecksumsFile fs path = do str <- withFile fs path ReadMode (\h -> hGetAll fs h) - expectValidFile path FormatChecksumsFile (parseChecksumsFile (BSL.toStrict str)) + expectValidFile fs path FormatChecksumsFile (parseChecksumsFile (BSL.toStrict str)) {-# SPECIALISE writeChecksumsFile :: HasFS IO h -> FsPath -> ChecksumsFile -> IO () #-} writeChecksumsFile :: MonadThrow m @@ -414,24 +420,30 @@ checkCRC fs hbio dropCache expected fp = withFile fs fp ReadMode $ \h -> do hAdviseAll hbio h AdviceSequential !checksum <- hGetAllCRC32C' fs h defaultChunkSize initialCRC32C when dropCache $ hDropCacheAll hbio h - expectChecksum fp expected checksum + expectChecksum fs fp expected checksum {-# SPECIALISE expectChecksum :: - FsPath + HasFS IO h + -> FsPath -> CRC32C -> CRC32C -> IO () #-} expectChecksum :: MonadThrow m - => FsPath + => HasFS m h + -> FsPath -> CRC32C -> CRC32C -> m () -expectChecksum fp expected checksum = +expectChecksum hfs fp expected checksum = when (expected /= checksum) $ - throwIO $ ErrFileChecksumMismatch fp (unCRC32C expected) (unCRC32C checksum) + throwIO $ + ErrFileChecksumMismatch + (mkFsErrorPath hfs fp) + (unCRC32C expected) + (unCRC32C checksum) {------------------------------------------------------------------------------- @@ -451,7 +463,7 @@ data FileCorruptedError = -- | The file fails to parse. ErrFileFormatInvalid -- | File. - !FsPath + !FsErrorPath -- | File format. !FileFormat -- | Error message. @@ -459,7 +471,7 @@ data FileCorruptedError | -- | The file CRC32 checksum is invalid. ErrFileChecksumMismatch -- | File. - !FsPath + !FsErrorPath -- | Expected checksum. !Word32 -- | Actual checksum. @@ -469,19 +481,21 @@ data FileCorruptedError {-# SPECIALISE expectValidFile :: - FsPath + HasFS IO h + -> FsPath -> FileFormat -> Either String a -> IO a #-} expectValidFile :: - (MonadThrow m) - => FsPath + MonadThrow m + => HasFS f h + -> FsPath -> FileFormat -> Either String a -> m a -expectValidFile _file _format (Right x) = +expectValidFile _hfs _file _format (Right x) = pure x -expectValidFile file format (Left msg) = - throwIO $ ErrFileFormatInvalid file format msg +expectValidFile hfs file format (Left msg) = + throwIO $ ErrFileFormatInvalid (mkFsErrorPath hfs file) format msg diff --git a/src/Database/LSMTree/Internal/Run.hs b/src/Database/LSMTree/Internal/Run.hs index d04b0990c..fafe1c07c 100644 --- a/src/Database/LSMTree/Internal/Run.hs +++ b/src/Database/LSMTree/Internal/Run.hs @@ -294,7 +294,8 @@ openFromDisk :: -- TODO: make exception safe openFromDisk fs hbio runRunDataCaching indexType runRunFsPaths = do expectedChecksums <- - CRC.expectValidFile (runChecksumsPath runRunFsPaths) CRC.FormatChecksumsFile . fromChecksumsFile + CRC.expectValidFile fs (runChecksumsPath runRunFsPaths) CRC.FormatChecksumsFile + . fromChecksumsFile =<< CRC.readChecksumsFile fs (runChecksumsPath runRunFsPaths) -- verify checksums of files we don't read yet @@ -304,10 +305,12 @@ openFromDisk fs hbio runRunDataCaching indexType runRunFsPaths = do -- read and try parsing files runFilter <- - CRC.expectValidFile (forRunFilterRaw paths) CRC.FormatBloomFilterFile . bloomFilterFromSBS + CRC.expectValidFile fs (forRunFilterRaw paths) CRC.FormatBloomFilterFile + . bloomFilterFromSBS =<< readCRC (forRunFilterRaw expectedChecksums) (forRunFilterRaw paths) (runNumEntries, runIndex) <- - CRC.expectValidFile (forRunIndexRaw paths) CRC.FormatIndexFile . Index.fromSBS indexType + CRC.expectValidFile fs (forRunIndexRaw paths) CRC.FormatIndexFile + . Index.fromSBS indexType =<< readCRC (forRunIndexRaw expectedChecksums) (forRunIndexRaw paths) runKOpsFile <- FS.hOpen fs (runKOpsPath runRunFsPaths) FS.ReadMode @@ -336,5 +339,5 @@ openFromDisk fs hbio runRunDataCaching indexType runRunFsPaths = do (sbs, !checksum) <- CRC.hGetExactlyCRC32C_SBS fs h (fromIntegral n) CRC.initialCRC32C -- drop the file from the OS page cache FS.hAdviseAll hbio h FS.AdviceDontNeed - CRC.expectChecksum fp expected checksum + CRC.expectChecksum fs fp expected checksum return sbs diff --git a/src/Database/LSMTree/Internal/Snapshot.hs b/src/Database/LSMTree/Internal/Snapshot.hs index 1d1ee8786..0c806c964 100644 --- a/src/Database/LSMTree/Internal/Snapshot.hs +++ b/src/Database/LSMTree/Internal/Snapshot.hs @@ -517,7 +517,8 @@ openWriteBuffer reg resolve hfs hbio uc activeDir snapWriteBufferPaths = do -- TODO: This reads the blobfile twice: once to check the CRC and once more -- to copy it from the snapshot directory to the active directory. (expectedChecksumForKOps, expectedChecksumForBlob) <- - CRC.expectValidFile (writeBufferChecksumsPath snapWriteBufferPaths) CRC.FormatWriteBufferFile . fromChecksumsFileForWriteBufferFiles + CRC.expectValidFile hfs (writeBufferChecksumsPath snapWriteBufferPaths) CRC.FormatWriteBufferFile + . fromChecksumsFileForWriteBufferFiles =<< CRC.readChecksumsFile hfs (writeBufferChecksumsPath snapWriteBufferPaths) checkCRC hfs hbio False (unForKOps expectedChecksumForKOps) (writeBufferKOpsPath snapWriteBufferPaths) checkCRC hfs hbio False (unForBlob expectedChecksumForBlob) (writeBufferBlobPath snapWriteBufferPaths) diff --git a/src/Database/LSMTree/Internal/Snapshot/Codec.hs b/src/Database/LSMTree/Internal/Snapshot/Codec.hs index eef4b6b23..732506e46 100644 --- a/src/Database/LSMTree/Internal/Snapshot/Codec.hs +++ b/src/Database/LSMTree/Internal/Snapshot/Codec.hs @@ -22,7 +22,6 @@ import Codec.CBOR.Decoding import Codec.CBOR.Encoding import Codec.CBOR.Read import Codec.CBOR.Write -import Control.Monad (when) import Control.Monad.Class.MonadThrow (Exception (displayException), MonadThrow (..)) import Data.Bifunctor (Bifunctor (..)) @@ -130,16 +129,15 @@ readFileSnapshotMetaData hfs contentPath checksumPath = do checksumFile <- readChecksumsFile hfs checksumPath let checksumFileName = ChecksumsFileName (BSC.pack "metadata") - expectedChecksum <- getChecksum checksumPath checksumFile checksumFileName + expectedChecksum <- getChecksum hfs checksumPath checksumFile checksumFileName (lbs, actualChecksum) <- FS.withFile hfs contentPath FS.ReadMode $ \h -> do n <- FS.hGetSize hfs h FS.hGetExactlyCRC32C hfs h n initialCRC32C - when (expectedChecksum /= actualChecksum) . throwIO $ - ErrFileChecksumMismatch contentPath (unCRC32C expectedChecksum) (unCRC32C actualChecksum) + expectChecksum hfs contentPath expectedChecksum actualChecksum - expectValidFile contentPath FormatSnapshotMetaData (decodeSnapshotMetaData lbs) + expectValidFile hfs contentPath FormatSnapshotMetaData (decodeSnapshotMetaData lbs) decodeSnapshotMetaData :: ByteString -> Either String SnapshotMetaData decodeSnapshotMetaData lbs = bimap displayException (getVersioned . snd) (deserialiseFromBytes decode lbs) From 1927d00e796307faefadf7e79f3f46652f19eb24 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 1 Apr 2025 00:20:32 +0100 Subject: [PATCH 18/43] bloomfilter: fix showing counterexamples in prop_verifyFPR --- test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs b/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs index 386ba548c..7da0e5a0d 100644 --- a/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs +++ b/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs @@ -89,7 +89,7 @@ prop_verifyFPR p alloc (NumEntries numEntries) (Seed seed) = ub = expectedFPR + 0.03 in assert (fprInvariant True measuredFPR) $ -- measured FPR is in the range [0,1] assert (fprInvariant True expectedFPR) $ -- expected FPR is in the range [0,1] - counterexample (printf "expected $f <= %f <= %f" lb measuredFPR ub) $ + counterexample (printf "expected %f <= %f <= %f" lb measuredFPR ub) $ lb <= measuredFPR .&&. measuredFPR <= ub {------------------------------------------------------------------------------- From 4aa43494fc2d21f5de50d2285326ec965d97eb21 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Sat, 5 Apr 2025 00:16:42 +0100 Subject: [PATCH 19/43] convert bloomFilterFromSBS to use new Bloom.deserialise Note that we now need the bloom bit array to always be pinned, because we can do I/O directly into that buffer. --- .hlint.yaml | 1 + bloomfilter/src/Data/BloomFilter/BitVec64.hs | 5 +- src/Database/LSMTree/Internal/BloomFilter.hs | 128 ++++++++++-------- src/Database/LSMTree/Internal/Run.hs | 11 +- .../Database/LSMTree/Internal/BloomFilter.hs | 74 ++++++---- 5 files changed, 128 insertions(+), 91 deletions(-) diff --git a/.hlint.yaml b/.hlint.yaml index 176ac6ab4..85e091a9f 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -36,6 +36,7 @@ - ignore: {name: "Redundant =="} - ignore: {name: "Hoist not"} - ignore: {name: "Use /="} +- ignore: {name: "Use unless"} # Specify additional command line arguments # diff --git a/bloomfilter/src/Data/BloomFilter/BitVec64.hs b/bloomfilter/src/Data/BloomFilter/BitVec64.hs index 73bb0e153..be35f4b22 100644 --- a/bloomfilter/src/Data/BloomFilter/BitVec64.hs +++ b/bloomfilter/src/Data/BloomFilter/BitVec64.hs @@ -71,13 +71,10 @@ newtype MBitVec64 s = MBV64 (VP.MVector s Word64) -- TODO: remove this workaround once a solution exists, e.g. a new primop that -- allows checking for implicit pinning. new :: Word64 -> ST s (MBitVec64 s) -new s - | numWords >= 128 = do +new s = do mba <- newPinnedByteArray numBytes setByteArray mba 0 numBytes (0 :: Word8) return (MBV64 (VP.MVector 0 numWords mba)) - | otherwise = - MBV64 <$> VPM.new numWords where !numWords = w2i (roundUpTo64 s) !numBytes = unsafeShiftL numWords 3 -- * 8 diff --git a/src/Database/LSMTree/Internal/BloomFilter.hs b/src/Database/LSMTree/Internal/BloomFilter.hs index 98271c057..bd5cb408e 100644 --- a/src/Database/LSMTree/Internal/BloomFilter.hs +++ b/src/Database/LSMTree/Internal/BloomFilter.hs @@ -2,24 +2,23 @@ module Database.LSMTree.Internal.BloomFilter ( bloomFilterToLBS, - bloomFilterFromSBS, + bloomFilterFromFile, ) where -import Control.Exception (assert) -import Control.Monad (when) -import qualified Data.BloomFilter as BF -import qualified Data.BloomFilter.BitVec64 as BV64 -import qualified Data.BloomFilter.Internal as BF +import Control.Monad (void, when) +import Control.Monad.Class.MonadThrow +import Control.Monad.Primitive (PrimMonad) +import qualified Data.ByteString as BS import qualified Data.ByteString.Builder.Extra as B import qualified Data.ByteString.Lazy as LBS -import Data.ByteString.Short (ShortByteString (SBS)) import qualified Data.Primitive.ByteArray as P -import qualified Data.Primitive.PrimArray as P -import qualified Data.Vector.Primitive as VP import Data.Word (Word32, Word64, byteSwap32) -import Database.LSMTree.Internal.BitMath (ceilDiv64, mul8) +import System.FS.API + +import qualified Data.BloomFilter as BF import Database.LSMTree.Internal.ByteString (byteArrayToByteString) -import Database.LSMTree.Internal.Vector +import Database.LSMTree.Internal.CRC32C (FileCorruptedError (..), + FileFormat (..)) -- serialising ----------------------------------------------------------- @@ -48,55 +47,74 @@ bloomFilterToLBS bf = -- deserialising ----------------------------------------------------------- --- | Read 'BF.Bloom' from a 'ShortByteString'. --- --- The input must be 64 bit aligned and exactly contain the serialised bloom --- filter. In successful case the data portion of bloom filter is /not/ copied --- (the short bytestring has only 16 bytes of extra data in the header). +{-# SPECIALISE bloomFilterFromFile :: + HasFS IO h + -> FsPath + -> Handle h + -> IO (BF.Bloom a) #-} +-- | Read a 'BF.Bloom' from a file. -- -bloomFilterFromSBS :: ShortByteString -> Either String (BF.Bloom a) -bloomFilterFromSBS (SBS ba') = do - when (P.sizeofByteArray ba < 16) $ Left "Doesn't contain a header" - - let ver = P.indexPrimArray word32pa 0 - hsn = P.indexPrimArray word32pa 1 - len = P.indexPrimArray word64pa 1 -- length in bits - - when (ver /= bloomFilterVersion) $ Left $ - if byteSwap32 ver == bloomFilterVersion +bloomFilterFromFile :: + (PrimMonad m, MonadCatch m) + => HasFS m h + -> FsPath -- ^ File path just for error reporting + -> Handle h -- ^ The open file, in read mode + -> m (BF.Bloom a) +bloomFilterFromFile hfs fp h = do + header <- rethrowEOFError "Doesn't contain a header" $ + hGetByteArrayExactly hfs h 16 + + let !version = P.indexByteArray header 0 :: Word32 + !nhashes = P.indexByteArray header 1 :: Word32 + !nbits = P.indexByteArray header 1 :: Word64 + + when (version /= bloomFilterVersion) $ throwFormatError $ + if byteSwap32 version == bloomFilterVersion then "Different byte order" else "Unsupported version" - when (len <= 0) $ Left "Length is zero" + when (nbits <= 0) $ throwFormatError "Length is zero" -- limit to 2^48 bits - when (len >= 0x1_0000_0000_0000) $ Left "Too large bloomfilter" - - -- we need to round the size of vector up - let len64 = fromIntegral (ceilDiv64 len) - -- make sure the bit vector exactly fits into the byte array - -- (smaller bit vector could work, but wastes memory and should not happen) - let bytesUsed = mul8 (2 + len64) - when (bytesUsed > P.sizeofByteArray ba) $ - Left "Byte array is too small for components" - when (bytesUsed < P.sizeofByteArray ba) $ - Left "Byte array is too large for components" - - let vec64 :: VP.Vector Word64 - vec64 = mkPrimVector 2 len64 ba - - let bloom = BF.Bloom { - numBits = fromIntegral len, - numHashes = fromIntegral hsn, - bitArray = BV64.BV64 vec64 - } - assert (BF.bloomInvariant bloom) $ return bloom + when (nbits >= 0x1_0000_0000_0000) $ throwFormatError "Too large bloomfilter" + --TODO: get max size from bloomfilter lib + + -- read the filter data from the file directly into the bloom filter + bloom <- + BF.deserialise + BF.BloomSize { + BF.bloomNumBits = fromIntegral nbits, + BF.bloomNumHashes = fromIntegral nhashes + } + (\buf off len -> + rethrowEOFError "bloom filter file too short" $ + void $ hGetBufExactly hfs + h buf (BufferOffset off) (fromIntegral len)) + + -- check we're now at EOF + trailing <- hGetSome hfs h 1 + when (not (BS.null trailing)) $ + throwFormatError "Byte array is too large for components" + return bloom where - ba :: P.ByteArray - ba = P.ByteArray ba' - - word32pa :: P.PrimArray Word32 - word32pa = P.PrimArray ba' + throwFormatError = throwIO + . ErrFileFormatInvalid + (mkFsErrorPath hfs fp) FormatBloomFilterFile + rethrowEOFError msg = + handleJust + (\e -> if isFsErrorType FsReachedEOF e then Just e else Nothing) + (\e -> throwIO $ + ErrFileFormatInvalid + (fsErrorPath e) FormatBloomFilterFile msg) + +hGetByteArrayExactly :: + (PrimMonad m, MonadThrow m) + => HasFS m h + -> Handle h + -> Int + -> m P.ByteArray +hGetByteArrayExactly hfs h len = do + buf <- P.newByteArray 16 + _ <- hGetBufExactly hfs h buf 0 (fromIntegral len) + P.unsafeFreezeByteArray buf - word64pa :: P.PrimArray Word64 - word64pa = P.PrimArray ba' diff --git a/src/Database/LSMTree/Internal/Run.hs b/src/Database/LSMTree/Internal/Run.hs index fafe1c07c..c076058df 100644 --- a/src/Database/LSMTree/Internal/Run.hs +++ b/src/Database/LSMTree/Internal/Run.hs @@ -43,7 +43,7 @@ import Database.LSMTree.Internal.BlobFile import Database.LSMTree.Internal.BlobRef hiding (mkRawBlobRef, mkWeakBlobRef) import qualified Database.LSMTree.Internal.BlobRef as BlobRef -import Database.LSMTree.Internal.BloomFilter (bloomFilterFromSBS) +import Database.LSMTree.Internal.BloomFilter (bloomFilterFromFile) import qualified Database.LSMTree.Internal.CRC32C as CRC import Database.LSMTree.Internal.Entry (NumEntries (..)) import Database.LSMTree.Internal.Index (Index, IndexType (..)) @@ -304,10 +304,11 @@ openFromDisk fs hbio runRunDataCaching indexType runRunFsPaths = do checkCRC runRunDataCaching (forRunBlobRaw expectedChecksums) (forRunBlobRaw paths) -- read and try parsing files - runFilter <- - CRC.expectValidFile fs (forRunFilterRaw paths) CRC.FormatBloomFilterFile - . bloomFilterFromSBS - =<< readCRC (forRunFilterRaw expectedChecksums) (forRunFilterRaw paths) + let filterPath = forRunFilterRaw paths + checkCRC CacheRunData (forRunFilterRaw expectedChecksums) filterPath + runFilter <- FS.withFile fs filterPath FS.ReadMode $ + bloomFilterFromFile fs filterPath + (runNumEntries, runIndex) <- CRC.expectValidFile fs (forRunIndexRaw paths) CRC.FormatIndexFile . Index.fromSBS indexType diff --git a/test/Test/Database/LSMTree/Internal/BloomFilter.hs b/test/Test/Database/LSMTree/Internal/BloomFilter.hs index 352196a46..e23cc5a42 100644 --- a/test/Test/Database/LSMTree/Internal/BloomFilter.hs +++ b/test/Test/Database/LSMTree/Internal/BloomFilter.hs @@ -3,16 +3,25 @@ module Test.Database.LSMTree.Internal.BloomFilter (tests) where import Control.DeepSeq (deepseq) -import Data.Bits (unsafeShiftL, unsafeShiftR, (.&.)) +import Control.Exception (displayException) +import Control.Monad (void) +import qualified Control.Monad.IOSim as IOSim +import Data.Bits ((.&.)) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Builder as BS.Builder +import qualified Data.ByteString.Builder.Extra as BS.Builder import qualified Data.ByteString.Lazy as LBS -import qualified Data.ByteString.Short as SBS -import Data.Primitive.ByteArray (ByteArray (..), byteArrayFromList) import qualified Data.Set as Set import qualified Data.Vector as V import qualified Data.Vector.Primitive as VP import Data.Word (Word32, Word64) +import qualified System.FS.API as FS +import qualified System.FS.API.Strict as FS +import qualified System.FS.Sim.MockFS as MockFS +import qualified System.FS.Sim.STM as FSSim import Test.QuickCheck.Gen (genDouble) +import Test.QuickCheck.Instances () import Test.Tasty (TestTree, testGroup) import Test.Tasty.QuickCheck hiding ((.&.)) @@ -34,7 +43,7 @@ tests :: TestTree tests = testGroup "Database.LSMTree.Internal.BloomFilter" [ testProperty "roundtrip" roundtrip_prop -- a specific case: 300 bits is just under 5x 64 bit words - , testProperty "roundtrip-3-300" $ roundtrip_prop (Positive (Small 3)) 300 + , testProperty "roundtrip-3-300" $ roundtrip_prop (Positive (Small 3)) (Positive 300) , testProperty "total-deserialisation" $ withMaxSuccess 10000 $ prop_total_deserialisation , testProperty "total-deserialisation-whitebox" $ withMaxSuccess 10000 $ @@ -48,44 +57,55 @@ tests = testGroup "Database.LSMTree.Internal.BloomFilter" #endif ] -roundtrip_prop :: Positive (Small Int) -> Word64 -> [Word64] -> Property -roundtrip_prop (Positive (Small hfN)) (limitBits -> bits) ws = - counterexample (show sbs) $ - Right lhs === rhs +roundtrip_prop :: Positive (Small Int) -> Positive Int -> [Word64] -> Property +roundtrip_prop (Positive (Small hfN)) (Positive bits) ws = + counterexample (show bs) $ + case bloomFilterFromBS bs of + Left err -> label (displayException err) $ property True + Right rhs -> lhs === rhs where - sz = BF.BloomSize { bloomNumBits = fromIntegral bits, + sz = BF.BloomSize { bloomNumBits = limitBits bits, bloomNumHashes = hfN } lhs = BF.fromList sz ws - sbs = SBS.toShort (LBS.toStrict (bloomFilterToLBS lhs)) - rhs = bloomFilterFromSBS sbs + bs = LBS.toStrict (bloomFilterToLBS lhs) -limitBits :: Word64 -> Word64 +limitBits :: Int -> Int limitBits b = b .&. 0xffffff -prop_total_deserialisation :: [Word32] -> Property -prop_total_deserialisation word32s = - case bloomFilterFromSBS (SBS.SBS ba) of - Left err -> label err $ property True +prop_total_deserialisation :: BS.ByteString -> Property +prop_total_deserialisation bs = + case bloomFilterFromBS bs of + Left err -> label (displayException err) $ property True Right bf -> label "parsed successfully" $ property $ -- Just forcing the filter is not enough (e.g. the bit vector might -- point outside of the byte array). bf `deepseq` BF.bloomInvariant bf - where - !(ByteArray ba) = byteArrayFromList word32s + +-- | Write the bytestring to a file in the mock file system and then use +-- 'bloomFilterFromFile'. +bloomFilterFromBS :: BS.ByteString -> Either IOSim.Failure (BF.Bloom a) +bloomFilterFromBS bs = + IOSim.runSim $ do + hfs <- FSSim.simHasFS' MockFS.empty + let file = FS.mkFsPath ["filter"] + -- write the bytestring + FS.withFile hfs file (FS.WriteMode FS.MustBeNew) $ \h -> do + void $ FS.hPutAllStrict hfs h bs + -- deserialise from file + FS.withFile hfs file FS.ReadMode $ \h -> + bloomFilterFromFile hfs file h -- Length is in Word64s. A large length would require significant amount of -- memory, so we make it 'Small'. prop_total_deserialisation_whitebox :: Word32 -> Small Word32 -> Property -prop_total_deserialisation_whitebox hsn (Small len64) = - forAll (vector (fromIntegral len64 * 2)) $ \word32s -> - prop_total_deserialisation (prefix <> word32s) +prop_total_deserialisation_whitebox hsn (Small nword64s) = + forAll (vector (fromIntegral nword64s * 8)) $ \bytes -> + prop_total_deserialisation (prefix <> BS.pack bytes) where - prefix = - [ 1 {- version -} - , hsn - , unsafeShiftL len64 6 -- len64 * 64 (lower 32 bits) - , unsafeShiftR len64 (32 - 6) -- len64 * 64 (upper 32 bits) - ] + prefix = LBS.toStrict $ BS.Builder.toLazyByteString $ + BS.Builder.word32Host 1 {- version -} + <> BS.Builder.word32Host hsn + <> BS.Builder.word64Host (fromIntegral nword64s) newtype FPR = FPR Double deriving stock Show From cfcffa014b2a7717ed57d2bb7981b9be3cc37ee3 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Wed, 9 Apr 2025 10:22:15 +0100 Subject: [PATCH 20/43] bloomfilter: Move most Data.BloomFilter modules under Data.BloomFilter.Classic in preparation for adding blocked bloom filters. Though we'll do a bit of module consolodation first. --- bench/macro/lsm-tree-bench-bloomfilter.hs | 2 +- .../Database/LSMTree/Internal/BloomFilter.hs | 2 +- bloomfilter/bench/bloomfilter-bench.hs | 2 +- bloomfilter/examples/spell.hs | 2 +- bloomfilter/src/Data/BloomFilter.hs | 278 +----------------- bloomfilter/src/Data/BloomFilter/Classic.hs | 277 +++++++++++++++++ .../BloomFilter/{ => Classic}/BitVec64.hs | 2 +- .../Data/BloomFilter/{ => Classic}/Calc.hs | 2 +- .../Data/BloomFilter/{ => Classic}/Easy.hs | 14 +- .../BloomFilter/{ => Classic}/Internal.hs | 4 +- .../Data/BloomFilter/{ => Classic}/Mutable.hs | 12 +- bloomfilter/tests/bloomfilter-tests.hs | 10 +- lsm-tree.cabal | 11 +- src/Database/LSMTree/Internal/RunAcc.hs | 4 +- .../Database/LSMTree/Internal/BloomFilter.hs | 4 +- .../LSMTree/Internal/RunBloomFilterAlloc.hs | 2 +- 16 files changed, 317 insertions(+), 311 deletions(-) create mode 100644 bloomfilter/src/Data/BloomFilter/Classic.hs rename bloomfilter/src/Data/BloomFilter/{ => Classic}/BitVec64.hs (99%) rename bloomfilter/src/Data/BloomFilter/{ => Classic}/Calc.hs (98%) rename bloomfilter/src/Data/BloomFilter/{ => Classic}/Easy.hs (92%) rename bloomfilter/src/Data/BloomFilter/{ => Classic}/Internal.hs (94%) rename bloomfilter/src/Data/BloomFilter/{ => Classic}/Mutable.hs (92%) diff --git a/bench/macro/lsm-tree-bench-bloomfilter.hs b/bench/macro/lsm-tree-bench-bloomfilter.hs index a878a06ea..c45fe3278 100644 --- a/bench/macro/lsm-tree-bench-bloomfilter.hs +++ b/bench/macro/lsm-tree-bench-bloomfilter.hs @@ -11,8 +11,8 @@ import Control.Monad.ST.Unsafe import Data.Bits ((.&.)) import Data.BloomFilter (Bloom) import qualified Data.BloomFilter as Bloom +import qualified Data.BloomFilter.Classic.Mutable as MBloom import qualified Data.BloomFilter.Hash as Bloom -import qualified Data.BloomFilter.Mutable as MBloom import Data.Time import Data.Vector (Vector) import qualified Data.Vector as V diff --git a/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs b/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs index a854fe41a..84fe79b92 100644 --- a/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs +++ b/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs @@ -11,7 +11,7 @@ module Bench.Database.LSMTree.Internal.BloomFilter ( import Criterion.Main import Data.BloomFilter (Bloom) import qualified Data.BloomFilter as Bloom -import qualified Data.BloomFilter.Easy as Bloom.Easy +import qualified Data.BloomFilter.Classic.Easy as Bloom.Easy import Data.BloomFilter.Hash (Hashable) import qualified Data.Foldable as Fold import Data.Map.Strict (Map) diff --git a/bloomfilter/bench/bloomfilter-bench.hs b/bloomfilter/bench/bloomfilter-bench.hs index 8b1285456..1dc2fb915 100644 --- a/bloomfilter/bench/bloomfilter-bench.hs +++ b/bloomfilter/bench/bloomfilter-bench.hs @@ -1,7 +1,7 @@ module Main where import qualified Data.BloomFilter as B -import qualified Data.BloomFilter.Easy as B +import qualified Data.BloomFilter.Classic.Easy as B import Data.BloomFilter.Hash (Hashable (..), hash64) import Data.Word (Word64) diff --git a/bloomfilter/examples/spell.hs b/bloomfilter/examples/spell.hs index 6f31a32a1..64f018838 100644 --- a/bloomfilter/examples/spell.hs +++ b/bloomfilter/examples/spell.hs @@ -6,7 +6,7 @@ import Control.Monad (forM_, when) import Data.Char (isLetter, toLower) import System.Environment (getArgs) -import Data.BloomFilter.Easy (easyList, notElem) +import Data.BloomFilter.Classic.Easy (easyList, notElem) import Prelude hiding (notElem) main :: IO () diff --git a/bloomfilter/src/Data/BloomFilter.hs b/bloomfilter/src/Data/BloomFilter.hs index 803f0bf4b..064a3349b 100644 --- a/bloomfilter/src/Data/BloomFilter.hs +++ b/bloomfilter/src/Data/BloomFilter.hs @@ -1,277 +1,5 @@ --- | --- --- A fast, space efficient Bloom filter implementation. A Bloom --- filter is a set-like data structure that provides a probabilistic --- membership test. --- --- * Queries do not give false negatives. When an element is added to --- a filter, a subsequent membership test will definitely return --- 'True'. --- --- * False positives /are/ possible. If an element has not been added --- to a filter, a membership test /may/ nevertheless indicate that --- the element is present. --- --- This module provides low-level control. For an easier to use --- interface, see the "Data.BloomFilter.Easy" module. - module Data.BloomFilter ( - -- * Overview - -- $overview - - -- ** Ease of use - -- $ease - - -- ** Performance - -- $performance - - -- ** Differences from bloomfilter package - -- $differences - - -- * Types - Hash, - Bloom, - MBloom, - CheapHashes, - BloomSize (..), - - -- * Immutable Bloom filters - - -- ** Conversion - freeze, - thaw, - unsafeFreeze, - - -- ** Creation - create, - unfold, - fromList, - deserialise, - - -- ** Accessors - size, - elem, - elemHashes, - notElem, - serialise, -) where - -import Control.Exception (assert) -import Control.Monad (forM_) -import Control.Monad.Primitive (PrimMonad, PrimState, RealWorld, - stToPrim) -import Control.Monad.ST (ST, runST) -import Data.Primitive.ByteArray (ByteArray, MutableByteArray) -import Data.Word (Word64) - -import qualified Data.BloomFilter.BitVec64 as V -import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, - makeHashes) -import Data.BloomFilter.Internal (Bloom (..), bloomInvariant) -import Data.BloomFilter.Mutable (BloomSize (..), MBloom) -import qualified Data.BloomFilter.Mutable as MB - -import Prelude hiding (elem, notElem) - --- | Create an immutable Bloom filter, using the given setup function --- which executes in the 'ST' monad. --- --- Example: --- --- @ ---filter = create (BloomSize 1024 3) $ \mf -> do --- insert mf \"foo\" --- insert mf \"bar\" --- @ --- --- Note that the result of the setup function is not used. -create :: BloomSize - -> (forall s. (MBloom s a -> ST s ())) -- ^ setup function - -> Bloom a -{-# INLINE create #-} -create bloomsize body = - runST $ do - mb <- MB.new bloomsize - body mb - unsafeFreeze mb - --- | Create an immutable Bloom filter from a mutable one. The mutable --- filter may be modified afterwards. -freeze :: MBloom s a -> ST s (Bloom a) -freeze MB.MBloom { numBits, numHashes, bitArray } = do - bitArray' <- V.freeze bitArray - let !bf = Bloom { - numHashes, - numBits, - bitArray = bitArray' - } - assert (bloomInvariant bf) $ pure bf - --- | Create an immutable Bloom filter from a mutable one. The mutable --- filter /must not/ be modified afterwards, or a runtime crash may --- occur. For a safer creation interface, use 'freeze' or 'create'. -unsafeFreeze :: MBloom s a -> ST s (Bloom a) -unsafeFreeze MB.MBloom { numBits, numHashes, bitArray } = do - bitArray' <- V.unsafeFreeze bitArray - let !bf = Bloom { - numHashes, - numBits, - bitArray = bitArray' - } - assert (bloomInvariant bf) $ pure bf - --- | Copy an immutable Bloom filter to create a mutable one. There is --- no non-copying equivalent. -thaw :: Bloom a -> ST s (MBloom s a) -thaw Bloom { numBits, numHashes, bitArray } = do - bitArray' <- V.thaw bitArray - pure MB.MBloom { - numBits, - numHashes, - bitArray = bitArray' - } - --- | Query an immutable Bloom filter for membership. If the value is --- present, return @True@. If the value is not present, there is --- /still/ some possibility that @True@ will be returned. -elem :: Hashable a => a -> Bloom a -> Bool -elem elt ub = elemHashes (makeHashes elt) ub - --- | Query an immutable Bloom filter for membership using already constructed 'Hashes' value. -elemHashes :: CheapHashes a -> Bloom a -> Bool -elemHashes !ch Bloom { numBits, numHashes, bitArray } = - go 0 - where - go :: Int -> Bool - go !i | i >= numHashes - = True - go !i = let idx' :: Word64 - !idx' = evalHashes ch i in - let idx :: Int - !idx = fromIntegral (idx' `V.unsafeRemWord64` fromIntegral numBits) in - -- While the idx' can cover the full Word64 range, - -- after taking the remainder, it now must fit in - -- and Int because it's less than the filter size. - if V.unsafeIndex bitArray idx - then go (i + 1) - else False - --- | Query an immutable Bloom filter for non-membership. If the value --- /is/ present, return @False@. If the value is not present, there --- is /still/ some possibility that @False@ will be returned. -notElem :: Hashable a => a -> Bloom a -> Bool -notElem elt ub = notElemHashes (makeHashes elt) ub - --- | Query an immutable Bloom filter for non-membership using already constructed 'Hashes' value. -notElemHashes :: CheapHashes a -> Bloom a -> Bool -notElemHashes !ch !ub = not (elemHashes ch ub) - --- | Return the size of the Bloom filter. -size :: Bloom a -> BloomSize -size Bloom { numBits, numHashes } = - BloomSize { - bloomNumBits = numBits, - bloomNumHashes = numHashes - } - --- | Build an immutable Bloom filter from a seed value. The seeding --- function populates the filter as follows. --- --- * If it returns 'Nothing', it is finished producing values to --- insert into the filter. --- --- * If it returns @'Just' (a,b)@, @a@ is added to the filter and --- @b@ is used as a new seed. -unfold :: forall a b. - Hashable a - => BloomSize - -> (b -> Maybe (a, b)) -- ^ seeding function - -> b -- ^ initial seed - -> Bloom a -{-# INLINE unfold #-} -unfold bloomsize f k = create bloomsize (loop k) - where loop :: forall s. b -> MBloom s a -> ST s () - loop j mb = case f j of - Just (a, j') -> MB.insert mb a >> loop j' mb - _ -> return () - --- | Create an immutable Bloom filter, populating it from a list of --- values. --- --- Here is an example that uses the @cheapHashes@ function from the --- "Data.BloomFilter.Hash" module to create a hash function that --- returns three hashes. --- --- @ --- filt = fromList 3 1024 [\"foo\", \"bar\", \"quux\"] --- @ -fromList :: Hashable a - => BloomSize - -> [a] -- ^ values to populate with - -> Bloom a -fromList bloomsize list = create bloomsize $ forM_ list . MB.insert - -serialise :: Bloom a -> (BloomSize, ByteArray, Int, Int) -serialise b@Bloom{bitArray} = - (size b, ba, off, len) - where - (ba, off, len) = V.serialise bitArray - -{-# SPECIALISE deserialise :: BloomSize - -> (MutableByteArray RealWorld -> Int -> Int -> IO ()) - -> IO (Bloom a) #-} -deserialise :: PrimMonad m - => BloomSize - -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) - -> m (Bloom a) -deserialise bloomsize fill = do - mbloom <- stToPrim $ MB.new bloomsize - MB.deserialise mbloom fill - stToPrim $ unsafeFreeze mbloom - --- $overview --- --- Each of the functions for creating Bloom filters accepts two parameters: --- --- * The number of bits that should be used for the filter. Note that --- a filter is fixed in size; it cannot be resized after creation. --- --- * A number of hash functions, /k/, to be used for the filter. --- --- By choosing these parameters with care, it is possible to tune for --- a particular false positive rate. --- The 'Data.BloomFilter.Easy.suggestSizing' function in --- the "Data.BloomFilter.Easy" module calculates useful estimates for --- these parameters. - --- $ease --- --- This module provides immutable interfaces for working with a --- query-only Bloom filter, and for converting to and from mutable --- Bloom filters. --- --- For a higher-level interface that is easy to use, see the --- "Data.BloomFilter.Easy" module. - --- $performance --- --- The implementation has been carefully tuned for high performance --- and low space consumption. + module Data.BloomFilter.Classic + ) where --- $differences --- --- This package is (almost entirely rewritten) fork of --- [bloomfilter](https://hackage.haskell.org/package/bloomfilter) package. --- --- The main differences are --- --- * This packages support bloomfilters of arbitrary sizes --- (not limited to powers of two). Also sizes over 2^32 are supported. --- --- * The 'Bloom' and 'MBloom' types are parametrised over 'Hashes' variable, --- instead of having a @a -> ['Hash']@ typed field. --- This separation allows clean de/serialization of Bloom filters in this --- package, as the hashing scheme is a static. --- --- * [XXH3 hash](https://xxhash.com/) is used instead of Jenkins' --- lookup3. +import Data.BloomFilter.Classic diff --git a/bloomfilter/src/Data/BloomFilter/Classic.hs b/bloomfilter/src/Data/BloomFilter/Classic.hs new file mode 100644 index 000000000..517a08782 --- /dev/null +++ b/bloomfilter/src/Data/BloomFilter/Classic.hs @@ -0,0 +1,277 @@ +-- | +-- +-- A fast, space efficient Bloom filter implementation. A Bloom +-- filter is a set-like data structure that provides a probabilistic +-- membership test. +-- +-- * Queries do not give false negatives. When an element is added to +-- a filter, a subsequent membership test will definitely return +-- 'True'. +-- +-- * False positives /are/ possible. If an element has not been added +-- to a filter, a membership test /may/ nevertheless indicate that +-- the element is present. +-- +-- This module provides low-level control. For an easier to use +-- interface, see the "Data.BloomFilter.Classic.Easy" module. + +module Data.BloomFilter.Classic ( + -- * Overview + -- $overview + + -- ** Ease of use + -- $ease + + -- ** Performance + -- $performance + + -- ** Differences from bloomfilter package + -- $differences + + -- * Types + Hash, + Bloom, + MBloom, + CheapHashes, + BloomSize (..), + + -- * Immutable Bloom filters + + -- ** Conversion + freeze, + thaw, + unsafeFreeze, + + -- ** Creation + create, + unfold, + fromList, + deserialise, + + -- ** Accessors + size, + elem, + elemHashes, + notElem, + serialise, +) where + +import Control.Exception (assert) +import Control.Monad (forM_) +import Control.Monad.Primitive (PrimMonad, PrimState, RealWorld, + stToPrim) +import Control.Monad.ST (ST, runST) +import Data.Primitive.ByteArray (ByteArray, MutableByteArray) +import Data.Word (Word64) + +import qualified Data.BloomFilter.Classic.BitVec64 as V +import Data.BloomFilter.Classic.Internal (Bloom (..), bloomInvariant) +import Data.BloomFilter.Classic.Mutable (BloomSize (..), MBloom) +import qualified Data.BloomFilter.Classic.Mutable as MB +import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, + makeHashes) + +import Prelude hiding (elem, notElem) + +-- | Create an immutable Bloom filter, using the given setup function +-- which executes in the 'ST' monad. +-- +-- Example: +-- +-- @ +--filter = create (BloomSize 1024 3) $ \mf -> do +-- insert mf \"foo\" +-- insert mf \"bar\" +-- @ +-- +-- Note that the result of the setup function is not used. +create :: BloomSize + -> (forall s. (MBloom s a -> ST s ())) -- ^ setup function + -> Bloom a +{-# INLINE create #-} +create bloomsize body = + runST $ do + mb <- MB.new bloomsize + body mb + unsafeFreeze mb + +-- | Create an immutable Bloom filter from a mutable one. The mutable +-- filter may be modified afterwards. +freeze :: MBloom s a -> ST s (Bloom a) +freeze MB.MBloom { numBits, numHashes, bitArray } = do + bitArray' <- V.freeze bitArray + let !bf = Bloom { + numHashes, + numBits, + bitArray = bitArray' + } + assert (bloomInvariant bf) $ pure bf + +-- | Create an immutable Bloom filter from a mutable one. The mutable +-- filter /must not/ be modified afterwards, or a runtime crash may +-- occur. For a safer creation interface, use 'freeze' or 'create'. +unsafeFreeze :: MBloom s a -> ST s (Bloom a) +unsafeFreeze MB.MBloom { numBits, numHashes, bitArray } = do + bitArray' <- V.unsafeFreeze bitArray + let !bf = Bloom { + numHashes, + numBits, + bitArray = bitArray' + } + assert (bloomInvariant bf) $ pure bf + +-- | Copy an immutable Bloom filter to create a mutable one. There is +-- no non-copying equivalent. +thaw :: Bloom a -> ST s (MBloom s a) +thaw Bloom { numBits, numHashes, bitArray } = do + bitArray' <- V.thaw bitArray + pure MB.MBloom { + numBits, + numHashes, + bitArray = bitArray' + } + +-- | Query an immutable Bloom filter for membership. If the value is +-- present, return @True@. If the value is not present, there is +-- /still/ some possibility that @True@ will be returned. +elem :: Hashable a => a -> Bloom a -> Bool +elem elt ub = elemHashes (makeHashes elt) ub + +-- | Query an immutable Bloom filter for membership using already constructed 'Hashes' value. +elemHashes :: CheapHashes a -> Bloom a -> Bool +elemHashes !ch Bloom { numBits, numHashes, bitArray } = + go 0 + where + go :: Int -> Bool + go !i | i >= numHashes + = True + go !i = let idx' :: Word64 + !idx' = evalHashes ch i in + let idx :: Int + !idx = fromIntegral (idx' `V.unsafeRemWord64` fromIntegral numBits) in + -- While the idx' can cover the full Word64 range, + -- after taking the remainder, it now must fit in + -- and Int because it's less than the filter size. + if V.unsafeIndex bitArray idx + then go (i + 1) + else False + +-- | Query an immutable Bloom filter for non-membership. If the value +-- /is/ present, return @False@. If the value is not present, there +-- is /still/ some possibility that @False@ will be returned. +notElem :: Hashable a => a -> Bloom a -> Bool +notElem elt ub = notElemHashes (makeHashes elt) ub + +-- | Query an immutable Bloom filter for non-membership using already constructed 'Hashes' value. +notElemHashes :: CheapHashes a -> Bloom a -> Bool +notElemHashes !ch !ub = not (elemHashes ch ub) + +-- | Return the size of the Bloom filter. +size :: Bloom a -> BloomSize +size Bloom { numBits, numHashes } = + BloomSize { + bloomNumBits = numBits, + bloomNumHashes = numHashes + } + +-- | Build an immutable Bloom filter from a seed value. The seeding +-- function populates the filter as follows. +-- +-- * If it returns 'Nothing', it is finished producing values to +-- insert into the filter. +-- +-- * If it returns @'Just' (a,b)@, @a@ is added to the filter and +-- @b@ is used as a new seed. +unfold :: forall a b. + Hashable a + => BloomSize + -> (b -> Maybe (a, b)) -- ^ seeding function + -> b -- ^ initial seed + -> Bloom a +{-# INLINE unfold #-} +unfold bloomsize f k = create bloomsize (loop k) + where loop :: forall s. b -> MBloom s a -> ST s () + loop j mb = case f j of + Just (a, j') -> MB.insert mb a >> loop j' mb + _ -> return () + +-- | Create an immutable Bloom filter, populating it from a list of +-- values. +-- +-- Here is an example that uses the @cheapHashes@ function from the +-- "Data.BloomFilter.Classic.Hash" module to create a hash function that +-- returns three hashes. +-- +-- @ +-- filt = fromList 3 1024 [\"foo\", \"bar\", \"quux\"] +-- @ +fromList :: Hashable a + => BloomSize + -> [a] -- ^ values to populate with + -> Bloom a +fromList bloomsize list = create bloomsize $ forM_ list . MB.insert + +serialise :: Bloom a -> (BloomSize, ByteArray, Int, Int) +serialise b@Bloom{bitArray} = + (size b, ba, off, len) + where + (ba, off, len) = V.serialise bitArray + +{-# SPECIALISE deserialise :: BloomSize + -> (MutableByteArray RealWorld -> Int -> Int -> IO ()) + -> IO (Bloom a) #-} +deserialise :: PrimMonad m + => BloomSize + -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) + -> m (Bloom a) +deserialise bloomsize fill = do + mbloom <- stToPrim $ MB.new bloomsize + MB.deserialise mbloom fill + stToPrim $ unsafeFreeze mbloom + +-- $overview +-- +-- Each of the functions for creating Bloom filters accepts two parameters: +-- +-- * The number of bits that should be used for the filter. Note that +-- a filter is fixed in size; it cannot be resized after creation. +-- +-- * A number of hash functions, /k/, to be used for the filter. +-- +-- By choosing these parameters with care, it is possible to tune for +-- a particular false positive rate. +-- The 'Data.BloomFilter.Classic.Easy.suggestSizing' function in +-- the "Data.BloomFilter.Classic.Easy" module calculates useful estimates for +-- these parameters. + +-- $ease +-- +-- This module provides immutable interfaces for working with a +-- query-only Bloom filter, and for converting to and from mutable +-- Bloom filters. +-- +-- For a higher-level interface that is easy to use, see the +-- "Data.BloomFilter.Classic.Easy" module. + +-- $performance +-- +-- The implementation has been carefully tuned for high performance +-- and low space consumption. + +-- $differences +-- +-- This package is (almost entirely rewritten) fork of +-- [bloomfilter](https://hackage.haskell.org/package/bloomfilter) package. +-- +-- The main differences are +-- +-- * This packages support bloomfilters of arbitrary sizes +-- (not limited to powers of two). Also sizes over 2^32 are supported. +-- +-- * The 'Bloom' and 'MBloom' types are parametrised over 'Hashes' variable, +-- instead of having a @a -> ['Hash']@ typed field. +-- This separation allows clean de/serialization of Bloom filters in this +-- package, as the hashing scheme is a static. +-- +-- * [XXH3 hash](https://xxhash.com/) is used instead of Jenkins' +-- lookup3. diff --git a/bloomfilter/src/Data/BloomFilter/BitVec64.hs b/bloomfilter/src/Data/BloomFilter/Classic/BitVec64.hs similarity index 99% rename from bloomfilter/src/Data/BloomFilter/BitVec64.hs rename to bloomfilter/src/Data/BloomFilter/Classic/BitVec64.hs index be35f4b22..79929231e 100644 --- a/bloomfilter/src/Data/BloomFilter/BitVec64.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/BitVec64.hs @@ -2,7 +2,7 @@ {-# LANGUAGE MagicHash #-} {-# LANGUAGE UnboxedTuples #-} -- | Minimal bit vector implementation. -module Data.BloomFilter.BitVec64 ( +module Data.BloomFilter.Classic.BitVec64 ( BitVec64 (..), unsafeIndex, prefetchIndex, diff --git a/bloomfilter/src/Data/BloomFilter/Calc.hs b/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs similarity index 98% rename from bloomfilter/src/Data/BloomFilter/Calc.hs rename to bloomfilter/src/Data/BloomFilter/Classic/Calc.hs index eca507331..8f1bcd8e4 100644 --- a/bloomfilter/src/Data/BloomFilter/Calc.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs @@ -1,5 +1,5 @@ -- | Various formulas for working with bloomfilters. -module Data.BloomFilter.Calc ( +module Data.BloomFilter.Classic.Calc ( BloomSize (..), bloomSizeForPolicy, BloomPolicy (..), diff --git a/bloomfilter/src/Data/BloomFilter/Easy.hs b/bloomfilter/src/Data/BloomFilter/Classic/Easy.hs similarity index 92% rename from bloomfilter/src/Data/BloomFilter/Easy.hs rename to bloomfilter/src/Data/BloomFilter/Classic/Easy.hs index 87343f383..45d2f0bc8 100644 --- a/bloomfilter/src/Data/BloomFilter/Easy.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/Easy.hs @@ -1,5 +1,5 @@ -- | An easy-to-use Bloom filter interface. -module Data.BloomFilter.Easy ( +module Data.BloomFilter.Classic.Easy ( -- * Easy creation and querying Bloom, easyList, @@ -23,12 +23,12 @@ module Data.BloomFilter.Easy ( ) where import Control.Monad.ST (ST) -import Data.BloomFilter (Bloom) -import qualified Data.BloomFilter as B -import Data.BloomFilter.Calc +import Data.BloomFilter.Classic (Bloom) +import qualified Data.BloomFilter.Classic as B +import Data.BloomFilter.Classic.Calc +import Data.BloomFilter.Classic.Mutable (MBloom) +import qualified Data.BloomFilter.Classic.Mutable as MB import Data.BloomFilter.Hash (Hashable) -import Data.BloomFilter.Mutable (MBloom) -import qualified Data.BloomFilter.Mutable as MB import qualified Data.ByteString as SB ------------------------------------------------------------------------------- @@ -37,7 +37,7 @@ import qualified Data.ByteString as SB -- | Create a Bloom filter with the desired false positive rate and -- members. The hash functions used are computed by the @cheapHashes@ --- function from the 'Data.BloomFilter.Hash' module. +-- function from the 'Data.BloomFilter.Classic.Hash' module. easyList :: Hashable a => Double -- ^ desired false positive rate (0 < /ε/ < 1) -> [a] -- ^ values to populate with diff --git a/bloomfilter/src/Data/BloomFilter/Internal.hs b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs similarity index 94% rename from bloomfilter/src/Data/BloomFilter/Internal.hs rename to bloomfilter/src/Data/BloomFilter/Classic/Internal.hs index ccc726e36..bfdd42020 100644 --- a/bloomfilter/src/Data/BloomFilter/Internal.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs @@ -1,13 +1,13 @@ {-# OPTIONS_HADDOCK not-home #-} -- | This module exports 'Bloom'' definition. -module Data.BloomFilter.Internal ( +module Data.BloomFilter.Classic.Internal ( Bloom(..), bloomInvariant, ) where import Control.DeepSeq (NFData (..)) import Data.Bits -import qualified Data.BloomFilter.BitVec64 as V +import qualified Data.BloomFilter.Classic.BitVec64 as V import Data.Kind (Type) import Data.Primitive.ByteArray (sizeofByteArray) import qualified Data.Vector.Primitive as VP diff --git a/bloomfilter/src/Data/BloomFilter/Mutable.hs b/bloomfilter/src/Data/BloomFilter/Classic/Mutable.hs similarity index 92% rename from bloomfilter/src/Data/BloomFilter/Mutable.hs rename to bloomfilter/src/Data/BloomFilter/Classic/Mutable.hs index 933848c20..d5fc779f0 100644 --- a/bloomfilter/src/Data/BloomFilter/Mutable.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/Mutable.hs @@ -12,9 +12,9 @@ -- the element is present. -- -- This module provides low-level control. For an easier to use --- interface, see the "Data.BloomFilter.Easy" module. +-- interface, see the "Data.BloomFilter.Classic.Easy" module. -module Data.BloomFilter.Mutable ( +module Data.BloomFilter.Classic.Mutable ( -- * Overview -- $overview @@ -48,8 +48,8 @@ import Control.Monad.ST (ST) import Data.Kind (Type) import Data.Primitive.ByteArray (MutableByteArray) -import qualified Data.BloomFilter.BitVec64 as V -import Data.BloomFilter.Calc (BloomSize (..)) +import qualified Data.BloomFilter.Classic.BitVec64 as V +import Data.BloomFilter.Classic.Calc (BloomSize (..)) import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, makeHashes) @@ -143,8 +143,8 @@ size MBloom { numBits, numHashes } = -- -- By choosing these parameters with care, it is possible to tune for -- a particular false positive rate. --- The 'Data.BloomFilter.Easy.suggestSizing' function in --- the "Data.BloomFilter.Easy" module calculates useful estimates for +-- The 'Data.BloomFilter.Classic.Easy.suggestSizing' function in +-- the "Data.BloomFilter.Classic.Easy" module calculates useful estimates for -- these parameters. -- $ease diff --git a/bloomfilter/tests/bloomfilter-tests.hs b/bloomfilter/tests/bloomfilter-tests.hs index 6bba3d2f1..674e60160 100644 --- a/bloomfilter/tests/bloomfilter-tests.hs +++ b/bloomfilter/tests/bloomfilter-tests.hs @@ -1,11 +1,11 @@ module Main (main) where -import qualified Data.BloomFilter as B -import qualified Data.BloomFilter.BitVec64 as BV64 -import qualified Data.BloomFilter.Calc as B -import qualified Data.BloomFilter.Easy as B +import qualified Data.BloomFilter.Classic as B +import qualified Data.BloomFilter.Classic.BitVec64 as BV64 +import qualified Data.BloomFilter.Classic.Calc as B +import qualified Data.BloomFilter.Classic.Easy as B +import qualified Data.BloomFilter.Classic.Internal as BI import Data.BloomFilter.Hash (Hashable (..), hash64) -import qualified Data.BloomFilter.Internal as BI import Data.ByteString (ByteString) import qualified Data.ByteString as BS diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 643542fbb..3307f9369 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -421,12 +421,13 @@ library bloomfilter exposed-modules: Data.BloomFilter - Data.BloomFilter.BitVec64 - Data.BloomFilter.Calc - Data.BloomFilter.Easy + Data.BloomFilter.Classic + Data.BloomFilter.Classic.BitVec64 + Data.BloomFilter.Classic.Calc + Data.BloomFilter.Classic.Easy + Data.BloomFilter.Classic.Internal + Data.BloomFilter.Classic.Mutable Data.BloomFilter.Hash - Data.BloomFilter.Internal - Data.BloomFilter.Mutable ghc-options: -O2 -Wall diff --git a/src/Database/LSMTree/Internal/RunAcc.hs b/src/Database/LSMTree/Internal/RunAcc.hs index 2d45f1a53..5e047f35b 100644 --- a/src/Database/LSMTree/Internal/RunAcc.hs +++ b/src/Database/LSMTree/Internal/RunAcc.hs @@ -40,8 +40,8 @@ import Control.Exception (assert) import Control.Monad.ST.Strict import Data.BloomFilter (Bloom, MBloom) import qualified Data.BloomFilter as Bloom -import qualified Data.BloomFilter.Easy as Bloom.Easy -import qualified Data.BloomFilter.Mutable as MBloom +import qualified Data.BloomFilter.Classic.Easy as Bloom.Easy +import qualified Data.BloomFilter.Classic.Mutable as MBloom import Data.Primitive.PrimVar (PrimVar, modifyPrimVar, newPrimVar, readPrimVar) import Data.Word (Word64) diff --git a/test/Test/Database/LSMTree/Internal/BloomFilter.hs b/test/Test/Database/LSMTree/Internal/BloomFilter.hs index e23cc5a42..aa71c3ef7 100644 --- a/test/Test/Database/LSMTree/Internal/BloomFilter.hs +++ b/test/Test/Database/LSMTree/Internal/BloomFilter.hs @@ -26,8 +26,8 @@ import Test.Tasty (TestTree, testGroup) import Test.Tasty.QuickCheck hiding ((.&.)) import qualified Data.BloomFilter as BF -import qualified Data.BloomFilter.Easy as BF -import qualified Data.BloomFilter.Internal as BF (bloomInvariant) +import qualified Data.BloomFilter.Classic.Easy as BF +import qualified Data.BloomFilter.Classic.Internal as BF (bloomInvariant) import Database.LSMTree.Internal.BloomFilter import qualified Database.LSMTree.Internal.BloomFilterQuery1 as Bloom1 import Database.LSMTree.Internal.Serialise (SerialisedKey, diff --git a/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs b/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs index 7da0e5a0d..643e58f57 100644 --- a/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs +++ b/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs @@ -26,8 +26,8 @@ import Control.Exception (assert) import Control.Monad.ST import Data.BloomFilter (Bloom) import qualified Data.BloomFilter as Bloom +import qualified Data.BloomFilter.Classic.Mutable as MBloom import Data.BloomFilter.Hash (Hashable) -import qualified Data.BloomFilter.Mutable as MBloom import Data.Foldable (Foldable (..)) import Data.Proxy (Proxy (..)) import Data.Set (Set) From 8074c4be9e2c18776372bd5f136cdd3fba343915 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Wed, 9 Apr 2025 14:59:47 +0100 Subject: [PATCH 21/43] bloomfilter: improve naming in Calc functions in preparation for removing the Easy module. By making the normal stuff easy, we don't need a special Easy version of the API. We have made the normal stuff easy by allowing the size to be specified via either (sizeForFPR fpr n) or (sizeForBits b n). Whereas originally the low level API needed to be told the number of bits and number of hashes, which is indeed tricky. And then the Easy API wrapped that. --- bench/macro/lsm-tree-bench-bloomfilter.hs | 38 +++-- bench/macro/lsm-tree-bench-lookups.hs | 4 +- .../Database/LSMTree/Internal/BloomFilter.hs | 22 ++- bloomfilter/bench/bloomfilter-bench.hs | 3 +- bloomfilter/examples/spell.hs | 9 +- bloomfilter/src/Data/BloomFilter/Classic.hs | 136 +++++++++++------- .../src/Data/BloomFilter/Classic/Calc.hs | 86 ++++++----- .../src/Data/BloomFilter/Classic/Easy.hs | 123 ---------------- .../src/Data/BloomFilter/Classic/Mutable.hs | 16 +-- bloomfilter/tests/bloomfilter-tests.hs | 34 ++--- lsm-tree.cabal | 1 - .../Database/LSMTree/Extras/NoThunks.hs | 2 +- src/Database/LSMTree/Internal/BloomFilter.hs | 10 +- src/Database/LSMTree/Internal/RunAcc.hs | 55 ++----- .../Database/LSMTree/Internal/BloomFilter.hs | 12 +- test/Test/Database/LSMTree/Internal/Merge.hs | 4 +- .../LSMTree/Internal/RunBloomFilterAlloc.hs | 6 +- 17 files changed, 216 insertions(+), 345 deletions(-) delete mode 100644 bloomfilter/src/Data/BloomFilter/Classic/Easy.hs diff --git a/bench/macro/lsm-tree-bench-bloomfilter.hs b/bench/macro/lsm-tree-bench-bloomfilter.hs index c45fe3278..778126ba1 100644 --- a/bench/macro/lsm-tree-bench-bloomfilter.hs +++ b/bench/macro/lsm-tree-bench-bloomfilter.hs @@ -9,9 +9,8 @@ import Control.Monad import Control.Monad.ST import Control.Monad.ST.Unsafe import Data.Bits ((.&.)) -import Data.BloomFilter (Bloom) +import Data.BloomFilter (Bloom, BloomSize) import qualified Data.BloomFilter as Bloom -import qualified Data.BloomFilter.Classic.Mutable as MBloom import qualified Data.BloomFilter.Hash as Bloom import Data.Time import Data.Vector (Vector) @@ -28,7 +27,6 @@ import Text.Printf (printf) import Database.LSMTree.Extras.Orphans () import Database.LSMTree.Internal.Assertions (fromIntegralChecked) import qualified Database.LSMTree.Internal.BloomFilterQuery1 as Bloom1 -import Database.LSMTree.Internal.RunAcc (numHashFunctions) import Database.LSMTree.Internal.Serialise (SerialisedKey, serialiseKey) @@ -60,7 +58,7 @@ benchmarkNumLookups = 25_000_000 benchmarkBatchSize :: Int benchmarkBatchSize = 256 -benchmarkNumBitsPerEntry :: Integer +benchmarkNumBitsPerEntry :: RequestedBitsPerEntry benchmarkNumBitsPerEntry = 10 benchmarks :: IO () @@ -76,7 +74,7 @@ benchmarks = do let filterSizes = lsmStyleBloomFilters benchmarkSizeBase benchmarkNumBitsPerEntry putStrLn "Bloom filter stats:" - putStrLn "(numEntries, sizeFactor, numBits, numHashFuncs)" + putStrLn "(numEntries, sizeFactor, BloomSize { sizeBits, sizeHashes })" mapM_ print filterSizes putStrLn $ "total number of entries:\t " ++ show (totalNumEntries filterSizes) putStrLn $ "total filter size in bytes:\t " ++ show (totalNumBytes filterSizes) @@ -180,10 +178,10 @@ benchmark name description action n (subtractTime, subtractAlloc) expectedAlloc putStrLn "" return (timeNet, allocNet) --- | (numEntries, sizeFactor, numBits, numHashFuncs) -type BloomFilterSizeInfo = (Integer, Integer, Integer, Integer) +-- | (numEntries, sizeFactor, (BloomSize numBits numHashFuncs)) +type BloomFilterSizeInfo = (Integer, Integer, BloomSize) type SizeBase = Int -type RequestedBitsPerEntry = Integer +type RequestedBitsPerEntry = Double -- | Calculate the sizes of a realistic LSM style set of Bloom filters, one -- for each LSM run. This uses base 4, with 4 disk levels, using tiering @@ -194,28 +192,29 @@ type RequestedBitsPerEntry = Integer -- lsmStyleBloomFilters :: SizeBase -> RequestedBitsPerEntry -> [BloomFilterSizeInfo] lsmStyleBloomFilters l1 requestedBitsPerEntry = - [ (numEntries, sizeFactor, nbits, nhashes) + [ (numEntries, sizeFactor, bsize) | (numEntries, sizeFactor) <- replicate 8 (2^(l1+0), 1) -- 8 runs at level 1 (tiering) ++ replicate 8 (2^(l1+2), 4) -- 8 runs at level 2 (tiering) ++ replicate 8 (2^(l1+4),16) -- 8 runs at level 3 (tiering) ++ [(2^(l1+8),256)] -- 1 run at level 4 (leveling) - , let nbits = numEntries * requestedBitsPerEntry - nhashes = numHashFunctions nbits numEntries + , let bsize = Bloom.sizeForBits requestedBitsPerEntry (fromIntegral numEntries) ] totalNumEntries, totalNumBytes :: [BloomFilterSizeInfo] -> Integer totalNumEntries filterSizes = - sum [ numEntries | (numEntries, _, _, _) <- filterSizes ] + sum [ numEntries | (numEntries, _, _) <- filterSizes ] totalNumBytes filterSizes = - sum [ nbits | (_,_,nbits,_) <- filterSizes ] `div` 8 + sum [ toInteger (Bloom.sizeBits bsize) + | (_,_,bsize) <- filterSizes ] + `div` 8 totalNumEntriesSanityCheck :: SizeBase -> [BloomFilterSizeInfo] -> Bool totalNumEntriesSanityCheck l1 filterSizes = totalNumEntries filterSizes == - sum [ 2^l1 * sizeFactor | (_, sizeFactor, _, _) <- filterSizes ] + sum [ 2^l1 * sizeFactor | (_, sizeFactor, _) <- filterSizes ] -- | Input environment for benchmarking 'Bloom.elemMany'. @@ -240,12 +239,7 @@ elemManyEnv :: [BloomFilterSizeInfo] elemManyEnv filterSizes rng0 = stToIO $ do -- create the filters - mbs <- sequence - [ MBloom.new MBloom.BloomSize { - bloomNumBits = fromIntegralChecked numBits, - bloomNumHashes = fromIntegralChecked numHashFuncs - } - | (_, _, numBits, numHashFuncs) <- filterSizes ] + mbs <- sequence [ Bloom.new bsize | (_, _, bsize) <- filterSizes ] -- add elements foldM_ (\rng (i, mb) -> do @@ -254,13 +248,13 @@ elemManyEnv filterSizes rng0 = -- insert n elements into filter b let k :: Word256 (!k, !rng') = uniform rng - MBloom.insert mb (serialiseKey k) + Bloom.insert mb (serialiseKey k) return rng' ) rng0 (zip [0 .. totalNumEntries filterSizes - 1] (cycle [ mb' - | (mb, (_, sizeFactor, _, _)) <- zip mbs filterSizes + | (mb, (_, sizeFactor, _)) <- zip mbs filterSizes , mb' <- replicate (fromIntegralChecked sizeFactor) mb ])) V.fromList <$> mapM Bloom.unsafeFreeze mbs diff --git a/bench/macro/lsm-tree-bench-lookups.hs b/bench/macro/lsm-tree-bench-lookups.hs index b40065149..0ad4c30ed 100644 --- a/bench/macro/lsm-tree-bench-lookups.hs +++ b/bench/macro/lsm-tree-bench-lookups.hs @@ -166,10 +166,10 @@ benchmarks !caching = withFS $ \hfs hbio -> do traceMarkerIO "Computing statistics for generated runs" let numEntries = V.map Run.size runs numPages = V.map Run.sizeInPages runs - nhashes = V.map (Bloom.bloomNumHashes . Bloom.size) blooms + nhashes = V.map (Bloom.sizeHashes . Bloom.size) blooms bitsPerEntry = V.zipWith (\b (NumEntries n) -> - fromIntegral (Bloom.bloomNumBits (Bloom.size b)) + fromIntegral (Bloom.sizeBits (Bloom.size b)) / fromIntegral n :: Double) blooms numEntries diff --git a/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs b/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs index 84fe79b92..c3087a783 100644 --- a/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs +++ b/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs @@ -9,9 +9,9 @@ module Bench.Database.LSMTree.Internal.BloomFilter ( ) where import Criterion.Main +import qualified Data.Bifoldable as BiFold import Data.BloomFilter (Bloom) import qualified Data.BloomFilter as Bloom -import qualified Data.BloomFilter.Classic.Easy as Bloom.Easy import Data.BloomFilter.Hash (Hashable) import qualified Data.Foldable as Fold import Data.Map.Strict (Map) @@ -38,8 +38,11 @@ benchmarks = bgroup "Bench.Database.LSMTree.Internal.BloomFilter" [ ] , env (constructionEnv 2_500_000) $ \ m -> bgroup "construction" [ - bench "easyList 0.1" $ whnf (constructBloom Bloom.Easy.easyList 0.1) m - , bench "easyList 0.9" $ whnf (constructBloom Bloom.Easy.easyList 0.9) m + bench "easyList 0.1" $ + whnf (constructBloom 0.1) m + + , bench "easyList 0.9" $ + whnf (constructBloom 0.9) m ] ] @@ -57,7 +60,9 @@ elemEnv fpr nbloom nelemsPositive nelemsNegative = do $ uniformWithoutReplacement @UTxOKey stdgen (nbloom + nelemsNegative) ys2 = sampleUniformWithReplacement @UTxOKey stdgen' nelemsPositive xs zs <- generate $ shuffle (ys1 ++ ys2) - pure (Bloom.Easy.easyList fpr (fmap serialiseKey xs), fmap serialiseKey zs) + pure ( Bloom.fromList (Bloom.policyForFPR fpr) (fmap serialiseKey xs) + , fmap serialiseKey zs + ) -- | Used for benchmarking 'Bloom.elem'. elems :: Hashable a => Bloom a -> [a] -> () @@ -74,8 +79,11 @@ constructionEnv n = do -- | Used for benchmarking the construction of bloom filters from write buffers. constructBloom :: - (Double -> [SerialisedKey] -> Bloom SerialisedKey) - -> Double + Double -> Map SerialisedKey SerialisedKey -> Bloom SerialisedKey -constructBloom mkBloom fpr m = mkBloom fpr (Map.keys m) +constructBloom fpr m = + -- For faster construction, avoid going via lists and use Bloom.create, + -- traversing the map inserting the keys + Bloom.create (Bloom.sizeForFPR fpr (Map.size m)) $ \b -> + BiFold.bifoldMap (\k -> Bloom.insert b k) (\_v -> pure ()) m diff --git a/bloomfilter/bench/bloomfilter-bench.hs b/bloomfilter/bench/bloomfilter-bench.hs index 1dc2fb915..a1572c24c 100644 --- a/bloomfilter/bench/bloomfilter-bench.hs +++ b/bloomfilter/bench/bloomfilter-bench.hs @@ -1,7 +1,6 @@ module Main where import qualified Data.BloomFilter as B -import qualified Data.BloomFilter.Classic.Easy as B import Data.BloomFilter.Hash (Hashable (..), hash64) import Data.Word (Word64) @@ -27,7 +26,7 @@ main = constructBloom :: Int -> Double -> StdGen -> B.Bloom Word64 constructBloom n fpr g0 = - B.unfold (B.suggestSizing n fpr) nextElement (g0, 0) + B.unfold (B.sizeForFPR fpr n) nextElement (g0, 0) where nextElement :: (StdGen, Int) -> Maybe (Word64, (StdGen, Int)) nextElement (!g, !i) diff --git a/bloomfilter/examples/spell.hs b/bloomfilter/examples/spell.hs index 64f018838..ae2407a6d 100644 --- a/bloomfilter/examples/spell.hs +++ b/bloomfilter/examples/spell.hs @@ -1,19 +1,16 @@ {-# LANGUAGE BangPatterns #-} module Main (main) where -import Control.Exception (IOException, catch) import Control.Monad (forM_, when) -import Data.Char (isLetter, toLower) import System.Environment (getArgs) -import Data.BloomFilter.Classic.Easy (easyList, notElem) -import Prelude hiding (notElem) +import qualified Data.BloomFilter as B main :: IO () main = do files <- getArgs dictionary <- readFile "/usr/share/dict/words" - let !bloom = easyList 0.01 (words dictionary) + let !bloom = B.fromList (B.policyForFPR 0.01) (words dictionary) forM_ files $ \file -> do ws <- words <$> readFile file - forM_ ws $ \w -> when (w `notElem` bloom) $ putStrLn w + forM_ ws $ \w -> when (w `B.notElem` bloom) $ putStrLn w diff --git a/bloomfilter/src/Data/BloomFilter/Classic.hs b/bloomfilter/src/Data/BloomFilter/Classic.hs index 517a08782..fc920b433 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic.hs @@ -19,28 +19,18 @@ module Data.BloomFilter.Classic ( -- * Overview -- $overview - -- ** Ease of use - -- $ease - - -- ** Performance - -- $performance + -- ** Example: a spell checker + -- $example -- ** Differences from bloomfilter package -- $differences -- * Types Hash, - Bloom, - MBloom, CheapHashes, - BloomSize (..), -- * Immutable Bloom filters - - -- ** Conversion - freeze, - thaw, - unsafeFreeze, + Bloom, -- ** Creation create, @@ -48,16 +38,38 @@ module Data.BloomFilter.Classic ( fromList, deserialise, + -- ** Sizes + NumEntries, + BloomSize (..), + FPR, + sizeForFPR, + BitsPerEntry, + sizeForBits, + sizeForPolicy, + BloomPolicy (..), + policyFPR, + policyForFPR, + policyForBits, + -- ** Accessors size, elem, elemHashes, notElem, serialise, + + -- * Mutable Bloom filters + MBloom, + new, + insert, + + -- ** Conversion + freeze, + thaw, + unsafeFreeze, ) where import Control.Exception (assert) -import Control.Monad (forM_) import Control.Monad.Primitive (PrimMonad, PrimState, RealWorld, stToPrim) import Control.Monad.ST (ST, runST) @@ -65,8 +77,9 @@ import Data.Primitive.ByteArray (ByteArray, MutableByteArray) import Data.Word (Word64) import qualified Data.BloomFilter.Classic.BitVec64 as V +import Data.BloomFilter.Classic.Calc import Data.BloomFilter.Classic.Internal (Bloom (..), bloomInvariant) -import Data.BloomFilter.Classic.Mutable (BloomSize (..), MBloom) +import Data.BloomFilter.Classic.Mutable (MBloom (..), insert, new) import qualified Data.BloomFilter.Classic.Mutable as MB import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, makeHashes) @@ -79,7 +92,7 @@ import Prelude hiding (elem, notElem) -- Example: -- -- @ ---filter = create (BloomSize 1024 3) $ \mf -> do +--filter = create (sizeForBits 16 2) $ \mf -> do -- insert mf \"foo\" -- insert mf \"bar\" -- @ @@ -91,14 +104,14 @@ create :: BloomSize {-# INLINE create #-} create bloomsize body = runST $ do - mb <- MB.new bloomsize + mb <- new bloomsize body mb unsafeFreeze mb -- | Create an immutable Bloom filter from a mutable one. The mutable -- filter may be modified afterwards. freeze :: MBloom s a -> ST s (Bloom a) -freeze MB.MBloom { numBits, numHashes, bitArray } = do +freeze MBloom { numBits, numHashes, bitArray } = do bitArray' <- V.freeze bitArray let !bf = Bloom { numHashes, @@ -111,7 +124,7 @@ freeze MB.MBloom { numBits, numHashes, bitArray } = do -- filter /must not/ be modified afterwards, or a runtime crash may -- occur. For a safer creation interface, use 'freeze' or 'create'. unsafeFreeze :: MBloom s a -> ST s (Bloom a) -unsafeFreeze MB.MBloom { numBits, numHashes, bitArray } = do +unsafeFreeze MBloom { numBits, numHashes, bitArray } = do bitArray' <- V.unsafeFreeze bitArray let !bf = Bloom { numHashes, @@ -125,7 +138,7 @@ unsafeFreeze MB.MBloom { numBits, numHashes, bitArray } = do thaw :: Bloom a -> ST s (MBloom s a) thaw Bloom { numBits, numHashes, bitArray } = do bitArray' <- V.thaw bitArray - pure MB.MBloom { + pure MBloom { numBits, numHashes, bitArray = bitArray' @@ -170,8 +183,8 @@ notElemHashes !ch !ub = not (elemHashes ch ub) size :: Bloom a -> BloomSize size Bloom { numBits, numHashes } = BloomSize { - bloomNumBits = numBits, - bloomNumHashes = numHashes + sizeBits = numBits, + sizeHashes = numHashes } -- | Build an immutable Bloom filter from a seed value. The seeding @@ -192,24 +205,24 @@ unfold :: forall a b. unfold bloomsize f k = create bloomsize (loop k) where loop :: forall s. b -> MBloom s a -> ST s () loop j mb = case f j of - Just (a, j') -> MB.insert mb a >> loop j' mb + Just (a, j') -> insert mb a >> loop j' mb _ -> return () --- | Create an immutable Bloom filter, populating it from a list of --- values. +-- | Create a Bloom filter, populating it from a sequence of values. -- --- Here is an example that uses the @cheapHashes@ function from the --- "Data.BloomFilter.Classic.Hash" module to create a hash function that --- returns three hashes. +-- For example -- -- @ --- filt = fromList 3 1024 [\"foo\", \"bar\", \"quux\"] +-- filt = fromList (policyForBits 10) [\"foo\", \"bar\", \"quux\"] -- @ -fromList :: Hashable a - => BloomSize - -> [a] -- ^ values to populate with +fromList :: (Foldable t, Hashable a) + => BloomPolicy + -> t a -- ^ values to populate with -> Bloom a -fromList bloomsize list = create bloomsize $ forM_ list . MB.insert +fromList policy xs = + create bsize (\b -> mapM_ (insert b) xs) + where + bsize = sizeForPolicy policy (length xs) serialise :: Bloom a -> (BloomSize, ByteArray, Int, Int) serialise b@Bloom{bitArray} = @@ -225,38 +238,55 @@ deserialise :: PrimMonad m -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) -> m (Bloom a) deserialise bloomsize fill = do - mbloom <- stToPrim $ MB.new bloomsize + mbloom <- stToPrim $ new bloomsize MB.deserialise mbloom fill stToPrim $ unsafeFreeze mbloom -- $overview -- --- Each of the functions for creating Bloom filters accepts two parameters: +-- Each of the functions for creating Bloom filters accepts a 'BloomSize'. The +-- size determines the number of bits that should be used for the filter. Note +-- that a filter is fixed in size; it cannot be resized after creation. -- --- * The number of bits that should be used for the filter. Note that --- a filter is fixed in size; it cannot be resized after creation. +-- The size can be specified by asking for a target false positive rate (FPR) +-- or a number of bits per element, and the number of elements in the filter. +-- For example: -- --- * A number of hash functions, /k/, to be used for the filter. +-- * @'sizeForFPR' 1e-3 10_000@ for a Bloom filter sizes for 10,000 elements +-- with a false positive rate of 1 in 1000 +-- * @'sizeForBits' 10 10_000@ for a Bloom filter sizes for 10,000 elements +-- with 10 bits per element -- --- By choosing these parameters with care, it is possible to tune for --- a particular false positive rate. --- The 'Data.BloomFilter.Classic.Easy.suggestSizing' function in --- the "Data.BloomFilter.Classic.Easy" module calculates useful estimates for --- these parameters. - --- $ease +-- Depending on the application it may be more important to target a fixed +-- amount of memory to use, or target a specific FPR. +-- +-- As a very rough guide for filter sizes, here are a range of FPRs and bits +-- per element: -- --- This module provides immutable interfaces for working with a --- query-only Bloom filter, and for converting to and from mutable --- Bloom filters. +-- * FPR of 1e-1 requires approximately 4.8 bits per element +-- * FPR of 1e-2 requires approximately 9.6 bits per element +-- * FPR of 1e-3 requires approximately 14.4 bits per element +-- * FPR of 1e-4 requires approximately 19.2 bits per element +-- * FPR of 1e-5 requires approximately 24.0 bits per element -- --- For a higher-level interface that is easy to use, see the --- "Data.BloomFilter.Classic.Easy" module. --- $performance +-- $example +-- +-- This example reads a dictionary file containing one word per line, +-- constructs a Bloom filter with a 1% false positive rate, and +-- spellchecks its standard input. Like the Unix @spell@ command, it +-- prints each word that it does not recognize. -- --- The implementation has been carefully tuned for high performance --- and low space consumption. +-- @ +-- import Data.Maybe (mapMaybe) +-- import qualified Data.BloomFilter as B +-- +-- main = do +-- filt \<- B.fromList (B.policyForFPR 0.01) . words \<$> readFile "\/usr\/share\/dict\/words" +-- let check word | B.elem word filt = Nothing +-- | otherwise = Just word +-- interact (unlines . mapMaybe check . lines) +-- @ -- $differences -- diff --git a/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs b/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs index 8f1bcd8e4..c5a3b1221 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs @@ -1,11 +1,16 @@ -- | Various formulas for working with bloomfilters. module Data.BloomFilter.Classic.Calc ( + NumEntries, BloomSize (..), - bloomSizeForPolicy, + FPR, + sizeForFPR, + BitsPerEntry, + sizeForBits, + sizeForPolicy, BloomPolicy (..), - bloomPolicyFPR, - bloomPolicyForFPR, - bloomPolicyForBitsPerEntry, + policyFPR, + policyForFPR, + policyForBits, ) where import Numeric @@ -19,34 +24,35 @@ type NumEntries = Int -- -- We can decide a policy based on: -- --- 1. a target false positive rate (FPR) using 'bloomPolicyForFPR' --- 2. a number of bits per entry using 'bloomPolicyForBitsPerEntry' +-- 1. a target false positive rate (FPR) using 'policyForFPR' +-- 2. a number of bits per entry using 'policyForBits' -- -- A policy can be turned into a 'BloomSize' given a target 'NumEntries' using --- 'bloomSizeForPolicy'. +-- 'sizeForPolicy'. -- -- Either way we define the policy, we can inspect the result to see: -- --- 1. The bits per entry 'bloomPolicyBitsPerEntry'. This will determine the +-- 1. The bits per entry 'policyBits'. This will determine the -- size of the bloom filter in bits. In general the bits per entry can be -- fractional. The final bloom filter size in will be rounded to a whole -- number of bits. --- 2. the number of hashes 'bloomPolicyNumHashes'. +-- 2. The number of hashes 'policyNumHashes'. +-- 3. The expected FPR for the policy using 'policyFPR'. -- data BloomPolicy = BloomPolicy { - bloomPolicyBitsPerEntry :: !Double, - bloomPolicyNumHashes :: !Int + policyBits :: !Double, + policyHashes :: !Int } deriving Show -bloomPolicyForFPR :: FPR -> BloomPolicy -bloomPolicyForFPR fpr | fpr <= 0 || fpr >= 1 = +policyForFPR :: FPR -> BloomPolicy +policyForFPR fpr | fpr <= 0 || fpr >= 1 = error "bloomPolicyForFPR: fpr out of range (0,1)" -bloomPolicyForFPR fpr = +policyForFPR fpr = BloomPolicy { - bloomPolicyBitsPerEntry = c, - bloomPolicyNumHashes = k + policyBits = c, + policyHashes = k } where -- There's a simper fomula to compute the number of bits, but it assumes @@ -54,28 +60,28 @@ bloomPolicyForFPR fpr = -- so we have to use a more precise approximation, using the actual value -- of k. k :: Int; k' :: Double - k = max 1 (round ((-recip_log2) * log_fpr)) + k = min 64 (max 1 (round ((-recip_log2) * log_fpr))) k' = fromIntegral k c = negate k' / log1mexp (log_fpr / k') log_fpr = log fpr -bloomPolicyForBitsPerEntry :: BitsPerEntry -> BloomPolicy -bloomPolicyForBitsPerEntry c | c < 1 || c > 64 = - error "bloomPolicyForBitsPerEntry: out of ragnge [1,64]" +policyForBits :: BitsPerEntry -> BloomPolicy +policyForBits c | c < 1 || c > 64 = + error "policyForBits: out of ragnge [1,64]" -bloomPolicyForBitsPerEntry c = +policyForBits c = BloomPolicy { - bloomPolicyBitsPerEntry = c, - bloomPolicyNumHashes = k + policyBits = c, + policyHashes = k } where k = max 1 (round (c * log2)) -bloomPolicyFPR :: BloomPolicy -> FPR -bloomPolicyFPR BloomPolicy { - bloomPolicyBitsPerEntry = c, - bloomPolicyNumHashes = k - } = +policyFPR :: BloomPolicy -> FPR +policyFPR BloomPolicy { + policyBits = c, + policyHashes = k + } = negate (expm1 (negate (k' / c))) ** k' where k' = fromIntegral k @@ -85,21 +91,27 @@ bloomPolicyFPR BloomPolicy { data BloomSize = BloomSize { -- | The requested number of bits in filter. -- The actual size will be rounded up to the nearest 512. - bloomNumBits :: !Int, + sizeBits :: !Int, -- | The number of hash functions to use. - bloomNumHashes :: !Int + sizeHashes :: !Int } deriving Show -bloomSizeForPolicy :: BloomPolicy -> NumEntries -> BloomSize -bloomSizeForPolicy BloomPolicy { - bloomPolicyBitsPerEntry = c, - bloomPolicyNumHashes = k - } n = +sizeForFPR :: FPR -> NumEntries -> BloomSize +sizeForFPR = sizeForPolicy . policyForFPR + +sizeForBits :: BitsPerEntry -> NumEntries -> BloomSize +sizeForBits = sizeForPolicy . policyForBits + +sizeForPolicy :: BloomPolicy -> NumEntries -> BloomSize +sizeForPolicy BloomPolicy { + policyBits = c, + policyHashes = k + } n = BloomSize { - bloomNumBits = max 0 (ceiling (fromIntegral n * c)), - bloomNumHashes = max 1 k + sizeBits = max 1 (ceiling (fromIntegral n * c)), + sizeHashes = max 1 k } log2, recip_log2 :: Double diff --git a/bloomfilter/src/Data/BloomFilter/Classic/Easy.hs b/bloomfilter/src/Data/BloomFilter/Classic/Easy.hs deleted file mode 100644 index 45d2f0bc8..000000000 --- a/bloomfilter/src/Data/BloomFilter/Classic/Easy.hs +++ /dev/null @@ -1,123 +0,0 @@ --- | An easy-to-use Bloom filter interface. -module Data.BloomFilter.Classic.Easy ( - -- * Easy creation and querying - Bloom, - easyList, - B.elem, - B.notElem, - B.size, - - -- * Mutable bloom filter - MBloom, - easyNew, - MB.new, - MB.insert, - B.freeze, - - -- ** Example: a spell checker - -- $example - - -- * Useful defaults for creation - safeSuggestSizing, - suggestSizing, -) where - -import Control.Monad.ST (ST) -import Data.BloomFilter.Classic (Bloom) -import qualified Data.BloomFilter.Classic as B -import Data.BloomFilter.Classic.Calc -import Data.BloomFilter.Classic.Mutable (MBloom) -import qualified Data.BloomFilter.Classic.Mutable as MB -import Data.BloomFilter.Hash (Hashable) -import qualified Data.ByteString as SB - -------------------------------------------------------------------------------- --- Easy interface -------------------------------------------------------------------------------- - --- | Create a Bloom filter with the desired false positive rate and --- members. The hash functions used are computed by the @cheapHashes@ --- function from the 'Data.BloomFilter.Classic.Hash' module. -easyList :: Hashable a - => Double -- ^ desired false positive rate (0 < /ε/ < 1) - -> [a] -- ^ values to populate with - -> Bloom a -{-# SPECIALISE easyList :: Double -> [SB.ByteString] -> Bloom SB.ByteString #-} -easyList errRate xs = - B.fromList (suggestSizing capacity errRate) xs - where - capacity = length xs - --- | Create a Bloom filter with the desired false positive rate, /ε/ --- and expected maximum size, /n/. -easyNew :: Double -- ^ desired false positive rate (0 < /ε/ < 1) - -> Int -- ^ expected maximum size, /n/ - -> ST s (MBloom s a) -easyNew errRate capacity = - MB.new (suggestSizing capacity errRate) - -------------------------------------------------------------------------------- --- Size suggestions -------------------------------------------------------------------------------- - --- | Suggest a good combination of filter size and number of hash --- functions for a Bloom filter, based on its expected maximum --- capacity and a desired false positive rate. --- --- The false positive rate is the rate at which queries against the --- filter should return 'True' when an element is not actually --- present. It should be a fraction between 0 and 1, so a 1% false --- positive rate is represented by 0.01. --- --- This function will suggest to use a bloom filter of prime size. --- These theoretically behave the best. --- Also it won't suggest to use over 63 hash functions, --- because CheapHashes work only up to 63 functions. --- --- Note that while creating bloom filters with extremely small (or --- even negative) capacity is allowed for convenience, it is often --- not very useful. --- This function will always suggest to use at least 61 bits. --- --- >>> safeSuggestSizing 10000 0.01 --- Right (99317,7) --- -safeSuggestSizing :: - Int -- ^ expected maximum capacity - -> Double -- ^ desired false positive rate (0 < /e/ < 1) - -> Either String BloomSize -safeSuggestSizing capacity errRate - | capacity <= 0 = Right BloomSize { - bloomNumBits = 1, - bloomNumHashes = 1 - } - | errRate <= 0 || - errRate >= 1 = Left "invalid error rate" - | otherwise = Right $ bloomSizeForPolicy (bloomPolicyForFPR errRate) - capacity - --- | Behaves as 'safeSuggestSizing', but calls 'error' if given --- invalid or out-of-range inputs. -suggestSizing :: Int -- ^ expected maximum capacity - -> Double -- ^ desired false positive rate (0 < /e/ < 1) - -> BloomSize -suggestSizing cap errs = either fatal id (safeSuggestSizing cap errs) - where fatal = error . ("Data.BloomFilter.Util.suggestSizing: " ++) - --- $example --- --- This example reads a dictionary file containing one word per line, --- constructs a Bloom filter with a 1% false positive rate, and --- spellchecks its standard input. Like the Unix @spell@ command, it --- prints each word that it does not recognize. --- --- @ --- import Data.Maybe (mapMaybe) --- import qualified Data.BloomFilter.Easy as B --- --- main = do --- filt \<- B.'easyList' 0.01 . words \<$> readFile "\/usr\/share\/dict\/words" --- let check word | B.'B.elem' word filt = Nothing --- | otherwise = Just word --- interact (unlines . mapMaybe check . lines) --- @ diff --git a/bloomfilter/src/Data/BloomFilter/Classic/Mutable.hs b/bloomfilter/src/Data/BloomFilter/Classic/Mutable.hs index d5fc779f0..e8e05a0c1 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic/Mutable.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/Mutable.hs @@ -72,16 +72,14 @@ instance Show (MBloom s a) where -- The size is ceiled at $2^48$. Tell us if you need bigger bloom filters. -- new :: BloomSize -> ST s (MBloom s a) -new BloomSize { bloomNumBits = numBits, bloomNumHashes } = do - bitArray <- V.new (fromIntegral numBits') +new BloomSize { sizeBits, sizeHashes = numHashes } = do + let !numBits = max 1 (min 0x1_0000_0000_0000 sizeBits) + bitArray <- V.new (fromIntegral numBits) pure MBloom { - numBits = numBits', - numHashes = bloomNumHashes, + numBits, + numHashes, bitArray } - where numBits' | numBits == 0 = 1 - | numBits >= 0xffff_ffff_ffff = 0x1_0000_0000_0000 - | otherwise = numBits -- | Modify the filter's bit array. The callback is expected to read (exactly) -- the given number of bytes into the given byte array buffer. @@ -128,8 +126,8 @@ elemHashes !ch MBloom { numBits = m, numHashes = k, bitArray = v } = size :: MBloom s a -> BloomSize size MBloom { numBits, numHashes } = BloomSize { - bloomNumBits = numBits, - bloomNumHashes = numHashes + sizeBits = numBits, + sizeHashes = numHashes } -- $overview diff --git a/bloomfilter/tests/bloomfilter-tests.hs b/bloomfilter/tests/bloomfilter-tests.hs index 674e60160..94bd4c3c7 100644 --- a/bloomfilter/tests/bloomfilter-tests.hs +++ b/bloomfilter/tests/bloomfilter-tests.hs @@ -2,8 +2,6 @@ module Main (main) where import qualified Data.BloomFilter.Classic as B import qualified Data.BloomFilter.Classic.BitVec64 as BV64 -import qualified Data.BloomFilter.Classic.Calc as B -import qualified Data.BloomFilter.Classic.Easy as B import qualified Data.BloomFilter.Classic.Internal as BI import Data.BloomFilter.Hash (Hashable (..), hash64) @@ -29,7 +27,7 @@ tests = testGroup "bloomfilter" , testProperty "prop_calc_size_fpr_fpr" prop_calc_size_fpr_fpr , testProperty "prop_calc_size_fpr_bits" prop_calc_size_fpr_bits ] - , testGroup "easyList" + , testGroup "fromList" [ testProperty "()" $ prop_pai () , testProperty "Char" $ prop_pai (undefined :: Char) , testProperty "Word32" $ prop_pai (undefined :: Word32) @@ -61,7 +59,7 @@ tests = testGroup "bloomfilter" ------------------------------------------------------------------------------- prop_pai :: (Hashable a) => a -> a -> [a] -> FPR -> Property -prop_pai _ x xs (FPR q) = let bf = B.easyList q (x:xs) in +prop_pai _ x xs (FPR q) = let bf = B.fromList (B.policyForFPR q) (x:xs) in B.elem x bf .&&. not (B.notElem x bf) ------------------------------------------------------------------------------- @@ -70,34 +68,32 @@ prop_pai _ x xs (FPR q) = let bf = B.easyList q (x:xs) in prop_calc_policy_fpr :: FPR -> Property prop_calc_policy_fpr (FPR fpr) = - let policy = B.bloomPolicyForFPR fpr - in B.bloomPolicyFPR policy ~~~ fpr + let policy = B.policyForFPR fpr + in B.policyFPR policy ~~~ fpr prop_calc_size_hashes_bits :: BitsPerEntry -> NumEntries -> Property prop_calc_size_hashes_bits (BitsPerEntry c) (NumEntries numEntries) = - let policy = B.bloomPolicyForBitsPerEntry c - bsize = B.bloomSizeForPolicy policy numEntries - in numHashFunctions (fromIntegral (B.bloomNumBits bsize)) + let bsize = B.sizeForBits c numEntries + in numHashFunctions (fromIntegral (B.sizeBits bsize)) (fromIntegral numEntries) - === fromIntegral (B.bloomNumHashes bsize) + === fromIntegral (B.sizeHashes bsize) prop_calc_size_fpr_fpr :: FPR -> NumEntries -> Property prop_calc_size_fpr_fpr (FPR fpr) (NumEntries numEntries) = - let policy = B.bloomPolicyForFPR fpr - bsize = B.bloomSizeForPolicy policy numEntries - in falsePositiveRate (fromIntegral (B.bloomNumBits bsize)) + let bsize = B.sizeForFPR fpr numEntries + in falsePositiveRate (fromIntegral (B.sizeBits bsize)) (fromIntegral numEntries) - (fromIntegral (B.bloomNumHashes bsize)) + (fromIntegral (B.sizeHashes bsize)) ~~~ fpr prop_calc_size_fpr_bits :: BitsPerEntry -> NumEntries -> Property prop_calc_size_fpr_bits (BitsPerEntry c) (NumEntries numEntries) = - let policy = B.bloomPolicyForBitsPerEntry c - bsize = B.bloomSizeForPolicy policy numEntries - in falsePositiveRate (fromIntegral (B.bloomNumBits bsize)) + let policy = B.policyForBits c + bsize = B.sizeForPolicy policy numEntries + in falsePositiveRate (fromIntegral (B.sizeBits bsize)) (fromIntegral numEntries) - (fromIntegral (B.bloomNumHashes bsize)) - ~~~ B.bloomPolicyFPR policy + (fromIntegral (B.sizeHashes bsize)) + ~~~ B.policyFPR policy -- reference implementations used for sanity checks diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 3307f9369..55cb43c7a 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -424,7 +424,6 @@ library bloomfilter Data.BloomFilter.Classic Data.BloomFilter.Classic.BitVec64 Data.BloomFilter.Classic.Calc - Data.BloomFilter.Classic.Easy Data.BloomFilter.Classic.Internal Data.BloomFilter.Classic.Mutable Data.BloomFilter.Hash diff --git a/src-extras/Database/LSMTree/Extras/NoThunks.hs b/src-extras/Database/LSMTree/Extras/NoThunks.hs index 0715fd142..5ac6ddbf4 100644 --- a/src-extras/Database/LSMTree/Extras/NoThunks.hs +++ b/src-extras/Database/LSMTree/Extras/NoThunks.hs @@ -21,7 +21,7 @@ import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO) import Control.RefCount import Control.Tracer import Data.Bit -import Data.BloomFilter +import Data.BloomFilter (Bloom, MBloom) import Data.Map.Strict import Data.Primitive import Data.Primitive.PrimVar diff --git a/src/Database/LSMTree/Internal/BloomFilter.hs b/src/Database/LSMTree/Internal/BloomFilter.hs index bd5cb408e..2a24ca569 100644 --- a/src/Database/LSMTree/Internal/BloomFilter.hs +++ b/src/Database/LSMTree/Internal/BloomFilter.hs @@ -33,12 +33,12 @@ bloomFilterToLBS bf = let (size, ba, off, len) = BF.serialise bf in header size <> byteArrayToLBS ba off len where - header BF.BloomSize { bloomNumBits, bloomNumHashes } = + header BF.BloomSize { sizeBits, sizeHashes } = -- creates a single 16 byte chunk B.toLazyByteStringWith (B.safeStrategy 16 B.smallChunkSize) mempty $ B.word32Host bloomFilterVersion - <> B.word32Host (fromIntegral bloomNumHashes) - <> B.word64Host (fromIntegral bloomNumBits) + <> B.word32Host (fromIntegral sizeHashes) + <> B.word64Host (fromIntegral sizeBits) byteArrayToLBS :: P.ByteArray -> Int -> Int -> LBS.ByteString byteArrayToLBS ba off len = @@ -83,8 +83,8 @@ bloomFilterFromFile hfs fp h = do bloom <- BF.deserialise BF.BloomSize { - BF.bloomNumBits = fromIntegral nbits, - BF.bloomNumHashes = fromIntegral nhashes + BF.sizeBits = fromIntegral nbits, + BF.sizeHashes = fromIntegral nhashes } (\buf off len -> rethrowEOFError "bloom filter file too short" $ diff --git a/src/Database/LSMTree/Internal/RunAcc.hs b/src/Database/LSMTree/Internal/RunAcc.hs index 5e047f35b..6d1b4ae67 100644 --- a/src/Database/LSMTree/Internal/RunAcc.hs +++ b/src/Database/LSMTree/Internal/RunAcc.hs @@ -31,8 +31,6 @@ module Database.LSMTree.Internal.RunAcc ( , RunBloomFilterAlloc (..) -- ** Exposed for testing , newMBloom - , numHashFunctions - , falsePositiveRate ) where import Control.DeepSeq (NFData (..)) @@ -40,12 +38,10 @@ import Control.Exception (assert) import Control.Monad.ST.Strict import Data.BloomFilter (Bloom, MBloom) import qualified Data.BloomFilter as Bloom -import qualified Data.BloomFilter.Classic.Easy as Bloom.Easy import qualified Data.BloomFilter.Classic.Mutable as MBloom import Data.Primitive.PrimVar (PrimVar, modifyPrimVar, newPrimVar, readPrimVar) import Data.Word (Word64) -import Database.LSMTree.Internal.Assertions (fromIntegralChecked) import Database.LSMTree.Internal.BlobRef (BlobSpan (..)) import Database.LSMTree.Internal.Chunk (Chunk) import Database.LSMTree.Internal.Entry (Entry (..), NumEntries (..)) @@ -325,7 +321,7 @@ selectPagesAndChunks mpagemchunkPre page chunks = -- | See 'Database.LSMTree.Internal.Config.BloomFilterAlloc' data RunBloomFilterAlloc = -- | Bits per element in a filter - RunAllocFixed !Word64 + RunAllocFixed !Word64 --TODO: this could be Double too | RunAllocRequestFPR !Double deriving stock (Show, Eq) @@ -334,44 +330,11 @@ instance NFData RunBloomFilterAlloc where rnf (RunAllocRequestFPR a) = rnf a newMBloom :: NumEntries -> RunBloomFilterAlloc -> ST s (MBloom s a) -newMBloom (NumEntries nentries) = \case - RunAllocFixed !bitsPerEntry -> - let nbits :: Int - !nbits = fromIntegral bitsPerEntry * nentries - in MBloom.new - Bloom.BloomSize { - bloomNumBits = nbits, - bloomNumHashes = fromIntegralChecked $ numHashFunctions (fromIntegral nbits) (fromIntegralChecked nentries) - } - RunAllocRequestFPR !fpr -> - Bloom.Easy.easyNew fpr nentries - --- | Computes the optimal number of hash functions that minimises the false --- positive rate for a bloom filter. --- --- See Niv Dayan, Manos Athanassoulis, Stratos Idreos, --- /Optimal Bloom Filters and Adaptive Merging for LSM-Trees/, --- Footnote 2, page 6. -numHashFunctions :: - Integer -- ^ Number of bits assigned to the bloom filter. - -> Integer -- ^ Number of entries inserted into the bloom filter. - -> Integer -numHashFunctions nbits nentries = truncate @Double $ max 1 $ - (fromIntegral nbits / fromIntegral nentries) * log 2 - --- | False positive rate --- --- Assumes that the bloom filter uses 'numHashFunctions' hash functions. --- --- See Niv Dayan, Manos Athanassoulis, Stratos Idreos, --- /Optimal Bloom Filters and Adaptive Merging for LSM-Trees/, --- Equation 2. -falsePositiveRate :: - Floating a - => a -- ^ entries - -> a -- ^ bits - -> a -falsePositiveRate entries bits = exp ((-(bits / entries)) * sq (log 2)) - -sq :: Num a => a -> a -sq x = x * x +newMBloom (NumEntries nentries) alloc = + MBloom.new (Bloom.sizeForPolicy (policy alloc) nentries) + where + --TODO: it'd be possible to turn the RunBloomFilterAlloc into a BloomPolicy + -- without the NumEntries, and cache the policy, avoiding recalculating the + -- policy every time. + policy (RunAllocFixed bitsPerEntry) = Bloom.policyForBits (fromIntegral bitsPerEntry) + policy (RunAllocRequestFPR fpr) = Bloom.policyForFPR fpr diff --git a/test/Test/Database/LSMTree/Internal/BloomFilter.hs b/test/Test/Database/LSMTree/Internal/BloomFilter.hs index aa71c3ef7..c98624b93 100644 --- a/test/Test/Database/LSMTree/Internal/BloomFilter.hs +++ b/test/Test/Database/LSMTree/Internal/BloomFilter.hs @@ -26,7 +26,6 @@ import Test.Tasty (TestTree, testGroup) import Test.Tasty.QuickCheck hiding ((.&.)) import qualified Data.BloomFilter as BF -import qualified Data.BloomFilter.Classic.Easy as BF import qualified Data.BloomFilter.Classic.Internal as BF (bloomInvariant) import Database.LSMTree.Internal.BloomFilter import qualified Database.LSMTree.Internal.BloomFilterQuery1 as Bloom1 @@ -64,9 +63,9 @@ roundtrip_prop (Positive (Small hfN)) (Positive bits) ws = Left err -> label (displayException err) $ property True Right rhs -> lhs === rhs where - sz = BF.BloomSize { bloomNumBits = limitBits bits, - bloomNumHashes = hfN } - lhs = BF.fromList sz ws + sz = BF.BloomSize { sizeBits = limitBits bits, + sizeHashes = hfN } + lhs = BF.create sz (\b -> mapM_ (BF.insert b) ws) bs = LBS.toStrict (bloomFilterToLBS lhs) limitBits :: Int -> Int @@ -121,7 +120,7 @@ prop_bloomQueries1 :: FPR -> Property prop_bloomQueries1 (FPR fpr) filters keys = let filters' :: [BF.Bloom SerialisedKey] - filters' = map (BF.easyList fpr . map (\(Small k) -> serialiseKey k)) + filters' = map (BF.fromList (BF.policyForFPR fpr) . map (\(Small k) -> serialiseKey k)) filters keys' :: [SerialisedKey] @@ -166,7 +165,8 @@ prop_bloomQueries2 :: FPR -> Property prop_bloomQueries2 (FPR fpr) filters keys = let filters' :: [BF.Bloom SerialisedKey] - filters' = map (BF.easyList fpr . map (\(Small k) -> serialiseKey k)) filters + filters' = map (BF.fromList (BF.policyForFPR fpr) . + map (\(Small k) -> serialiseKey k)) filters keys' :: [SerialisedKey] keys' = map (\(Small k) -> serialiseKey k) keys diff --git a/test/Test/Database/LSMTree/Internal/Merge.hs b/test/Test/Database/LSMTree/Internal/Merge.hs index 1d92c8dc5..22c83e07b 100644 --- a/test/Test/Database/LSMTree/Internal/Merge.hs +++ b/test/Test/Database/LSMTree/Internal/Merge.hs @@ -103,8 +103,8 @@ prop_MergeDistributes fs hbio mergeType stepSize (SmallList rds) = do (lhsSize === rhsSize) .&&. -- we can't just test bloom filter equality, their sizes may differ. counterexample "runFilter" - ( Bloom.bloomNumBits (Bloom.size lhsFilter) - >= Bloom.bloomNumBits (Bloom.size rhsFilter)) + ( Bloom.sizeBits (Bloom.size lhsFilter) + >= Bloom.sizeBits (Bloom.size rhsFilter)) .&&. -- the index is equal, but only because range finder precision is -- always 0 for the numbers of entries we are dealing with. counterexample "runIndex" diff --git a/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs b/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs index 643e58f57..3f1f87312 100644 --- a/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs +++ b/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs @@ -36,7 +36,7 @@ import Data.Word (Word64) import Database.LSMTree.Extras.Random import qualified Database.LSMTree.Internal.Entry as LSMT import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..), - falsePositiveRate, newMBloom) + newMBloom) import System.Random import Test.QuickCheck import Test.QuickCheck.Gen @@ -80,9 +80,7 @@ prop_verifyFPR p alloc (NumEntries numEntries) (Seed seed) = let stdgen = mkStdGen seed measuredFPR = measureApproximateFPR p (mkBloomFromAlloc alloc) numEntries stdgen expectedFPR = case alloc of - RunAllocFixed bits -> - falsePositiveRate (fromIntegral numEntries) - (fromIntegral bits * fromIntegral numEntries) + RunAllocFixed bits -> Bloom.policyFPR (Bloom.policyForBits (fromIntegral bits)) RunAllocRequestFPR requestedFPR -> requestedFPR -- error margins lb = expectedFPR - 0.1 From 3c5feae2b1999ae4212272d54f750682c16a467e Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Wed, 9 Apr 2025 23:38:45 +0100 Subject: [PATCH 22/43] bloomfilter: allow 0 bits in policyForBits It's ok to allow zero, and the zero case was being covered by tests for RunAcc that use newMBloom. The previous patch changed newMBloom from using its own custom code for determining the bits and hashes to using the Calc module ones, including policyForBits. So we could either adjust the RunAcc test to avoid zero, or just allow zero. We allow it. In the conversion from policy to size we enforce a minimum size of 1 bit. So we can allow less than 1 bits per key. --- bloomfilter/src/Data/BloomFilter/Classic/Calc.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs b/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs index c5a3b1221..2a9079004 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs @@ -66,8 +66,8 @@ policyForFPR fpr = log_fpr = log fpr policyForBits :: BitsPerEntry -> BloomPolicy -policyForBits c | c < 1 || c > 64 = - error "policyForBits: out of ragnge [1,64]" +policyForBits c | c < 0 || c > 64 = + error "policyForBits: out of ragnge [0,64]" policyForBits c = BloomPolicy { From 02bf170eeb5616cef987e9cc1f51f17ddcb9dd1c Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Thu, 10 Apr 2025 08:18:14 +0100 Subject: [PATCH 23/43] bloomfilter: remove last uses of internal modules --- .hlint.yaml | 1 + bloomfilter/src/Data/BloomFilter/Classic.hs | 97 +------- .../src/Data/BloomFilter/Classic/BitArray.hs | 130 ++++++++++ .../src/Data/BloomFilter/Classic/BitVec64.hs | 137 ----------- .../src/Data/BloomFilter/Classic/Internal.hs | 228 +++++++++++++++--- .../src/Data/BloomFilter/Classic/Mutable.hs | 157 ------------ bloomfilter/tests/bloomfilter-tests.hs | 12 - lsm-tree.cabal | 13 +- src/Database/LSMTree/Internal/RunAcc.hs | 31 ++- .../Database/LSMTree/Internal/BloomFilter.hs | 6 +- .../LSMTree/Internal/RunBloomFilterAlloc.hs | 29 +-- 11 files changed, 368 insertions(+), 473 deletions(-) create mode 100644 bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs delete mode 100644 bloomfilter/src/Data/BloomFilter/Classic/BitVec64.hs delete mode 100644 bloomfilter/src/Data/BloomFilter/Classic/Mutable.hs diff --git a/.hlint.yaml b/.hlint.yaml index 85e091a9f..1ef3d09a9 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -37,6 +37,7 @@ - ignore: {name: "Hoist not"} - ignore: {name: "Use /="} - ignore: {name: "Use unless"} +- ignore: {name: "Use notElem"} # Specify additional command line arguments # diff --git a/bloomfilter/src/Data/BloomFilter/Classic.hs b/bloomfilter/src/Data/BloomFilter/Classic.hs index fc920b433..b95ee7340 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic.hs @@ -27,6 +27,7 @@ module Data.BloomFilter.Classic ( -- * Types Hash, + Hashable, CheapHashes, -- * Immutable Bloom filters @@ -54,14 +55,15 @@ module Data.BloomFilter.Classic ( -- ** Accessors size, elem, - elemHashes, notElem, + elemHashes, serialise, -- * Mutable Bloom filters MBloom, new, insert, + insertHashes, -- ** Conversion freeze, @@ -69,20 +71,15 @@ module Data.BloomFilter.Classic ( unsafeFreeze, ) where -import Control.Exception (assert) import Control.Monad.Primitive (PrimMonad, PrimState, RealWorld, stToPrim) import Control.Monad.ST (ST, runST) -import Data.Primitive.ByteArray (ByteArray, MutableByteArray) -import Data.Word (Word64) +import Data.Primitive.ByteArray (MutableByteArray) -import qualified Data.BloomFilter.Classic.BitVec64 as V import Data.BloomFilter.Classic.Calc -import Data.BloomFilter.Classic.Internal (Bloom (..), bloomInvariant) -import Data.BloomFilter.Classic.Mutable (MBloom (..), insert, new) -import qualified Data.BloomFilter.Classic.Mutable as MB -import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, - makeHashes) +import Data.BloomFilter.Classic.Internal hiding (deserialise) +import qualified Data.BloomFilter.Classic.Internal as Internal +import Data.BloomFilter.Hash import Prelude hiding (elem, notElem) @@ -108,41 +105,10 @@ create bloomsize body = body mb unsafeFreeze mb --- | Create an immutable Bloom filter from a mutable one. The mutable --- filter may be modified afterwards. -freeze :: MBloom s a -> ST s (Bloom a) -freeze MBloom { numBits, numHashes, bitArray } = do - bitArray' <- V.freeze bitArray - let !bf = Bloom { - numHashes, - numBits, - bitArray = bitArray' - } - assert (bloomInvariant bf) $ pure bf - --- | Create an immutable Bloom filter from a mutable one. The mutable --- filter /must not/ be modified afterwards, or a runtime crash may --- occur. For a safer creation interface, use 'freeze' or 'create'. -unsafeFreeze :: MBloom s a -> ST s (Bloom a) -unsafeFreeze MBloom { numBits, numHashes, bitArray } = do - bitArray' <- V.unsafeFreeze bitArray - let !bf = Bloom { - numHashes, - numBits, - bitArray = bitArray' - } - assert (bloomInvariant bf) $ pure bf - --- | Copy an immutable Bloom filter to create a mutable one. There is --- no non-copying equivalent. -thaw :: Bloom a -> ST s (MBloom s a) -thaw Bloom { numBits, numHashes, bitArray } = do - bitArray' <- V.thaw bitArray - pure MBloom { - numBits, - numHashes, - bitArray = bitArray' - } +-- | Insert a value into a mutable Bloom filter. Afterwards, a +-- membership query for the same value is guaranteed to return @True@. +insert :: Hashable a => MBloom s a -> a -> ST s () +insert !mb !x = insertHashes mb (makeHashes x) -- | Query an immutable Bloom filter for membership. If the value is -- present, return @True@. If the value is not present, there is @@ -150,42 +116,11 @@ thaw Bloom { numBits, numHashes, bitArray } = do elem :: Hashable a => a -> Bloom a -> Bool elem elt ub = elemHashes (makeHashes elt) ub --- | Query an immutable Bloom filter for membership using already constructed 'Hashes' value. -elemHashes :: CheapHashes a -> Bloom a -> Bool -elemHashes !ch Bloom { numBits, numHashes, bitArray } = - go 0 - where - go :: Int -> Bool - go !i | i >= numHashes - = True - go !i = let idx' :: Word64 - !idx' = evalHashes ch i in - let idx :: Int - !idx = fromIntegral (idx' `V.unsafeRemWord64` fromIntegral numBits) in - -- While the idx' can cover the full Word64 range, - -- after taking the remainder, it now must fit in - -- and Int because it's less than the filter size. - if V.unsafeIndex bitArray idx - then go (i + 1) - else False - -- | Query an immutable Bloom filter for non-membership. If the value -- /is/ present, return @False@. If the value is not present, there -- is /still/ some possibility that @False@ will be returned. notElem :: Hashable a => a -> Bloom a -> Bool -notElem elt ub = notElemHashes (makeHashes elt) ub - --- | Query an immutable Bloom filter for non-membership using already constructed 'Hashes' value. -notElemHashes :: CheapHashes a -> Bloom a -> Bool -notElemHashes !ch !ub = not (elemHashes ch ub) - --- | Return the size of the Bloom filter. -size :: Bloom a -> BloomSize -size Bloom { numBits, numHashes } = - BloomSize { - sizeBits = numBits, - sizeHashes = numHashes - } +notElem = \elt ub -> not (elt `elem` ub) -- | Build an immutable Bloom filter from a seed value. The seeding -- function populates the filter as follows. @@ -224,12 +159,6 @@ fromList policy xs = where bsize = sizeForPolicy policy (length xs) -serialise :: Bloom a -> (BloomSize, ByteArray, Int, Int) -serialise b@Bloom{bitArray} = - (size b, ba, off, len) - where - (ba, off, len) = V.serialise bitArray - {-# SPECIALISE deserialise :: BloomSize -> (MutableByteArray RealWorld -> Int -> Int -> IO ()) -> IO (Bloom a) #-} @@ -239,7 +168,7 @@ deserialise :: PrimMonad m -> m (Bloom a) deserialise bloomsize fill = do mbloom <- stToPrim $ new bloomsize - MB.deserialise mbloom fill + Internal.deserialise mbloom fill stToPrim $ unsafeFreeze mbloom -- $overview diff --git a/bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs b/bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs new file mode 100644 index 000000000..5e53b3fe6 --- /dev/null +++ b/bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs @@ -0,0 +1,130 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +-- | Minimal bit array implementation. +module Data.BloomFilter.Classic.BitArray ( + BitArray (..), + unsafeIndex, + prefetchIndex, + MBitArray (..), + new, + unsafeSet, + freeze, + unsafeFreeze, + thaw, + serialise, + deserialise, +) where + +import Control.Exception (assert) +import Control.Monad.Primitive (PrimMonad, PrimState) +import Control.Monad.ST (ST) +import Data.Bits +import Data.Primitive.ByteArray +import Data.Primitive.PrimArray +import Data.Word (Word64, Word8) + +import GHC.Exts (Int (I#), prefetchByteArray0#) +import GHC.ST (ST (ST)) + +-- | Bit vector backed up by an array of Word64 +-- +-- This vector's offset and length are multiples of 64 +newtype BitArray = BitArray (PrimArray Word64) + deriving (Eq, Show) + +{-# INLINE unsafeIndex #-} +unsafeIndex :: BitArray -> Int -> Bool +unsafeIndex (BitArray arr) !i = + assert (j >= 0 && j < sizeofPrimArray arr) $ + unsafeTestBit (indexPrimArray arr j) k + where + !j = unsafeShiftR i 6 -- `div` 64, bit index to Word64 index. + !k = i .&. 63 -- `mod` 64, bit within Word64 + +{-# INLINE prefetchIndex #-} +prefetchIndex :: BitArray -> Int -> ST s () +prefetchIndex (BitArray (PrimArray ba#)) !i = + let !(I# bi#) = i `unsafeShiftR` 3 in + ST (\s -> case prefetchByteArray0# ba# bi# s of + s' -> (# s', () #)) + -- We only need to shiftR 3 here, not 6, because we're going from a bit + -- offset to a byte offset for prefetch. Whereas in unsafeIndex, we go from + -- a bit offset to a Word64 offset, so an extra shiftR 3, for 6 total. + +newtype MBitArray s = MBitArray (MutablePrimArray s Word64) + +-- | Will create an explicitly pinned byte array. +-- This is done because pinned byte arrays allow for more efficient +-- serialisation, but the definition of 'isByteArrayPinned' changed in GHC 9.6, +-- see . +-- +-- TODO: remove this workaround once a solution exists, e.g. a new primop that +-- allows checking for implicit pinning. +new :: Int -> ST s (MBitArray s) +new s = do + mba@(MutableByteArray mba#) <- newPinnedByteArray numBytes + setByteArray mba 0 numBytes (0 :: Word8) + return (MBitArray (MutablePrimArray mba#)) + where + !numWords = roundUpTo64 s + !numBytes = unsafeShiftL numWords 3 -- * 8 + + -- this may overflow, but so be it (1^64 bits is a lot) + roundUpTo64 :: Int -> Int + roundUpTo64 i = unsafeShiftR (i + 63) 6 + +serialise :: BitArray -> (ByteArray, Int, Int) +serialise bitArray = + let ba = asByteArray bitArray + in (ba, 0, sizeofByteArray ba) + where + asByteArray (BitArray (PrimArray ba#)) = ByteArray ba# + +-- | Do an inplace overwrite of the byte array representing the bit block. +deserialise :: PrimMonad m + => MBitArray (PrimState m) + -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) + -> m () +deserialise bitArray fill = do + let mba = asMutableByteArray bitArray + len <- getSizeofMutableByteArray mba + fill mba 0 len + where + asMutableByteArray (MBitArray (MutablePrimArray mba#)) = + MutableByteArray mba# + +unsafeSet :: MBitArray s -> Int -> ST s () +unsafeSet (MBitArray arr) i = do +#ifdef NO_IGNORE_ASSERTS + sz <- getSizeofMutablePrimArray arr + assert (j >= 0 && j < sz) $ return () +#endif + w <- readPrimArray arr j + writePrimArray arr j (unsafeSetBit w k) + where + !j = unsafeShiftR i 6 -- `div` 64 + !k = i .&. 63 -- `mod` 64 + +freeze :: MBitArray s -> ST s BitArray +freeze (MBitArray arr) = do + len <- getSizeofMutablePrimArray arr + BitArray <$> freezePrimArray arr 0 len + +unsafeFreeze :: MBitArray s -> ST s BitArray +unsafeFreeze (MBitArray arr) = + BitArray <$> unsafeFreezePrimArray arr + +thaw :: BitArray -> ST s (MBitArray s) +thaw (BitArray arr) = + MBitArray <$> thawPrimArray arr 0 (sizeofPrimArray arr) + +{-# INLINE unsafeTestBit #-} +-- like testBit but using unsafeShiftL instead of shiftL +unsafeTestBit :: Word64 -> Int -> Bool +unsafeTestBit w k = w .&. (1 `unsafeShiftL` k) /= 0 + +{-# INLINE unsafeSetBit #-} +-- like setBit but using unsafeShiftL instead of shiftL +unsafeSetBit :: Word64 -> Int -> Word64 +unsafeSetBit w k = w .|. (1 `unsafeShiftL` k) diff --git a/bloomfilter/src/Data/BloomFilter/Classic/BitVec64.hs b/bloomfilter/src/Data/BloomFilter/Classic/BitVec64.hs deleted file mode 100644 index 79929231e..000000000 --- a/bloomfilter/src/Data/BloomFilter/Classic/BitVec64.hs +++ /dev/null @@ -1,137 +0,0 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE UnboxedTuples #-} --- | Minimal bit vector implementation. -module Data.BloomFilter.Classic.BitVec64 ( - BitVec64 (..), - unsafeIndex, - prefetchIndex, - MBitVec64 (..), - new, - unsafeWrite, - unsafeRead, - freeze, - unsafeFreeze, - thaw, - unsafeRemWord64, - serialise, - deserialise, -) where - -import Control.Monad.ST (ST) -import Data.Bits -import Data.Primitive.ByteArray (ByteArray (ByteArray), - MutableByteArray, newPinnedByteArray, setByteArray) -import qualified Data.Vector.Primitive as VP -import qualified Data.Vector.Primitive.Mutable as VPM -import Data.Word (Word64, Word8) - -import GHC.Exts (Int (I#), prefetchByteArray0#, uncheckedIShiftRL#, - (+#)) -import qualified GHC.Exts -import GHC.ST (ST (ST)) -import GHC.Word (Word64 (W64#)) - - --- | Bit vector backed up by an array of Word64 --- --- This vector's offset and length are multiples of 64 -newtype BitVec64 = BV64 (VP.Vector Word64) - deriving (Eq, Show) - -{-# INLINE unsafeIndex #-} -unsafeIndex :: BitVec64 -> Int -> Bool -unsafeIndex (BV64 bv) i = - unsafeTestBit (VP.unsafeIndex bv j) k - where - !j = unsafeShiftR i 6 -- `div` 64, bit index to Word64 index. - !k = i .&. 63 -- `mod` 64, bit within Word64 - -{-# INLINE unsafeTestBit #-} --- like testBit but using unsafeShiftL instead of shiftL -unsafeTestBit :: Word64 -> Int -> Bool -unsafeTestBit w k = w .&. (1 `unsafeShiftL` k) /= 0 - -{-# INLINE prefetchIndex #-} -prefetchIndex :: BitVec64 -> Int -> ST s () -prefetchIndex (BV64 (VP.Vector (I# off#) _ (ByteArray ba#))) (I# i#) = - ST (\s -> case prefetchByteArray0# ba# (off# +# uncheckedIShiftRL# i# 3#) s of - s' -> (# s', () #)) - -- We only need to shiftR 3 here, not 6, because we're going from a bit - -- offset to a byte offset for prefetch. Whereas in unsafeIndex, we go from - -- a bit offset to a Word64 offset, so an extra shiftR 3, for 6 total. - -newtype MBitVec64 s = MBV64 (VP.MVector s Word64) - --- | Will create an explicitly pinned byte array if it is larger than 1 kB. --- This is done because pinned byte arrays allow for more efficient --- serialisation, but the definition of 'isByteArrayPinned' changed in GHC 9.6, --- see . --- --- TODO: remove this workaround once a solution exists, e.g. a new primop that --- allows checking for implicit pinning. -new :: Word64 -> ST s (MBitVec64 s) -new s = do - mba <- newPinnedByteArray numBytes - setByteArray mba 0 numBytes (0 :: Word8) - return (MBV64 (VP.MVector 0 numWords mba)) - where - !numWords = w2i (roundUpTo64 s) - !numBytes = unsafeShiftL numWords 3 -- * 8 - -serialise :: BitVec64 -> (ByteArray, Int, Int) -serialise = asByteArray - where - asByteArray (BV64 (VP.Vector off len ba)) = - (ba, off * 8, len * 8) - --- | Do an inplace overwrite of the byte array representing the bit block. -deserialise :: MBitVec64 s - -> (MutableByteArray s -> Int -> Int -> m ()) - -> m () -deserialise bitArray fill = - let (ba, off, len) = asMutableByteArray bitArray - in fill ba off len - where - asMutableByteArray (MBV64 (VP.MVector off len mba)) = - (mba, off * 8, len * 8) - -unsafeWrite :: MBitVec64 s -> Word64 -> Bool -> ST s () -unsafeWrite (MBV64 mbv) i x = do - VPM.unsafeModify mbv (\w -> if x then setBit w (w2i k) else clearBit w (w2i k)) (w2i j) - where - !j = unsafeShiftR i 6 -- `div` 64 - !k = i .&. 63 -- `mod` 64 - -unsafeRead :: MBitVec64 s -> Word64 -> ST s Bool -unsafeRead (MBV64 mbv) i = do - !w <- VPM.unsafeRead mbv (w2i j) - return $! testBit w (w2i k) - where - !j = unsafeShiftR i 6 -- `div` 64 - !k = i .&. 63 -- `mod` 64 - -freeze :: MBitVec64 s -> ST s BitVec64 -freeze (MBV64 mbv) = BV64 <$> VP.freeze mbv - -unsafeFreeze :: MBitVec64 s -> ST s BitVec64 -unsafeFreeze (MBV64 mbv) = BV64 <$> VP.unsafeFreeze mbv - -thaw :: BitVec64 -> ST s (MBitVec64 s) -thaw (BV64 bv) = MBV64 <$> VP.thaw bv - --- this may overflow, but so be it (1^64 bits is a lot) -roundUpTo64 :: Word64 -> Word64 -roundUpTo64 i = unsafeShiftR (i + 63) 6 - --- | Like 'rem' but does not check for division by 0. -unsafeRemWord64 :: Word64 -> Word64 -> Word64 -#if MIN_VERSION_base(4,17,0) -unsafeRemWord64 (W64# x#) (W64# y#) = W64# (x# `GHC.Exts.remWord64#` y#) -#else -unsafeRemWord64 (W64# x#) (W64# y#) = W64# (x# `GHC.Exts.remWord#` y#) -#endif - -w2i :: Word64 -> Int -w2i = fromIntegral -{-# INLINE w2i #-} diff --git a/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs index bfdd42020..32aedcc83 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs @@ -1,56 +1,220 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} {-# OPTIONS_HADDOCK not-home #-} --- | This module exports 'Bloom'' definition. +-- | This module defines the 'Bloom' and 'MBloom' types and all the functions +-- that need direct knowledge of and access to the representation. This forms +-- the trusted base. module Data.BloomFilter.Classic.Internal ( - Bloom(..), + -- * Mutable Bloom filters + MBloom, + new, + insertHashes, + + -- * Immutable Bloom filters + Bloom, bloomInvariant, -) where + size, + elemHashes, + + -- * Conversion + serialise, + deserialise, + freeze, + unsafeFreeze, + thaw, + ) where import Control.DeepSeq (NFData (..)) +import Control.Exception (assert) +import Control.Monad.Primitive (PrimMonad, PrimState) +import Control.Monad.ST (ST) import Data.Bits -import qualified Data.BloomFilter.Classic.BitVec64 as V import Data.Kind (Type) -import Data.Primitive.ByteArray (sizeofByteArray) -import qualified Data.Vector.Primitive as VP +import Data.Primitive.ByteArray +import Data.Primitive.PrimArray +import Data.Word (Word64) + +#if MIN_VERSION_base(4,17,0) +import GHC.Exts (remWord64#) +#else +import GHC.Exts (remWord#) +#endif +import GHC.Word (Word64 (W64#)) + +import Data.BloomFilter.Classic.BitArray (BitArray, MBitArray) +import qualified Data.BloomFilter.Classic.BitArray as BitArray +import Data.BloomFilter.Classic.Calc +import Data.BloomFilter.Hash + +------------------------------------------------------------------------------- +-- Mutable Bloom filters +-- + +type MBloom :: Type -> Type -> Type +-- | A mutable Bloom filter, for use within the 'ST' monad. +data MBloom s a = MBloom { + mbNumBits :: {-# UNPACK #-} !Int -- ^ non-zero + , mbNumHashes :: {-# UNPACK #-} !Int + , mbBitArray :: {-# UNPACK #-} !(MBitArray s) + } +type role MBloom nominal nominal + +instance Show (MBloom s a) where + show mb = "MBloom { " ++ show (mbNumBits mb) ++ " bits } " + +instance NFData (MBloom s a) where + rnf !_ = () + +-- | Create a new mutable Bloom filter. +-- +-- The size is ceiled at $2^48$. Tell us if you need bigger bloom filters. +-- +new :: BloomSize -> ST s (MBloom s a) +new BloomSize { sizeBits, sizeHashes } = do + let !mbNumBits = max 1 (min 0x1_0000_0000_0000 sizeBits) + mbBitArray <- BitArray.new mbNumBits + pure MBloom { + mbNumBits, + mbNumHashes = max 1 sizeHashes, + mbBitArray + } + +insertHashes :: MBloom s a -> CheapHashes a -> ST s () +insertHashes MBloom { mbNumBits = m, mbNumHashes = k, mbBitArray = a } !ch = + go 0 + where + go !i | i >= k = return () + go !i = let idx' :: Word64 + !idx' = evalHashes ch i in + let idx :: Int + !idx = fromIntegral (idx' `unsafeRemWord64` fromIntegral m) in + -- While the idx' can cover the full Word64 range, + -- after taking the remainder, it now must fit in + -- and Int because it's less than the filter size. + BitArray.unsafeSet a idx >> go (i + 1) + +-- | Modify the filter's bit array. The callback is expected to read (exactly) +-- the given number of bytes into the given byte array buffer. +-- +deserialise :: PrimMonad m + => MBloom (PrimState m) a + -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) + -> m () +deserialise MBloom {mbBitArray} fill = + BitArray.deserialise mbBitArray fill + + +------------------------------------------------------------------------------- +-- Immutable Bloom filters +-- type Bloom :: Type -> Type +-- | An immutable Bloom filter. data Bloom a = Bloom { numBits :: {-# UNPACK #-} !Int -- ^ non-zero , numHashes :: {-# UNPACK #-} !Int - , bitArray :: {-# UNPACK #-} !V.BitVec64 + , bitArray :: {-# UNPACK #-} !BitArray } + deriving Eq type role Bloom nominal bloomInvariant :: Bloom a -> Bool -bloomInvariant Bloom { numBits = s, bitArray = V.BV64 (VP.Vector off len ba) } = - s > 0 - && s <= 2^(48 :: Int) - && off >= 0 - && ceilDiv64 s == fromIntegral len - && (off + len) * 8 <= sizeofByteArray ba +bloomInvariant Bloom { numBits, bitArray = BitArray.BitArray pa } = + numBits > 0 + && numBits <= 2^(48 :: Int) + && ceilDiv64 numBits == sizeofPrimArray pa where ceilDiv64 x = unsafeShiftR (x + 63) 6 -instance Eq (Bloom a) where - -- We support arbitrary sized bitvectors, - -- therefore an equality is a bit involved: - -- we need to be careful when comparing the last bits of bitArray. - (==) Bloom { numBits = n, numHashes = k, bitArray = V.BV64 v } - Bloom { numBits = n', numHashes = k', bitArray = V.BV64 v' } = - k == k' && - n == n' && - VP.take w v == VP.take w v' && -- compare full words - if l == 0 then True else unsafeShiftL x s == unsafeShiftL x' s -- compare last words - where - !w = fromIntegral (unsafeShiftR n 6) :: Int -- n `div` 64 - !l = fromIntegral (n .&. 63) :: Int -- n `mod` 64 - !s = 64 - l - - -- last words - x = VP.unsafeIndex v w - x' = VP.unsafeIndex v' w - instance Show (Bloom a) where show mb = "Bloom { " ++ show (numBits mb) ++ " bits } " instance NFData (Bloom a) where rnf !_ = () + +-- | Return the size of the Bloom filter. +size :: Bloom a -> BloomSize +size Bloom { numBits, numHashes } = + BloomSize { + sizeBits = numBits, + sizeHashes = numHashes + } + +-- | Query an immutable Bloom filter for membership using already constructed 'Hashes' value. +elemHashes :: CheapHashes a -> Bloom a -> Bool +elemHashes !ch Bloom { numBits, numHashes, bitArray } = + go 0 + where + go :: Int -> Bool + go !i | i >= numHashes + = True + go !i = let idx' :: Word64 + !idx' = evalHashes ch i in + let idx :: Int + !idx = fromIntegral (idx' `unsafeRemWord64` fromIntegral numBits) in + -- While the idx' can cover the full Word64 range, + -- after taking the remainder, it now must fit in + -- and Int because it's less than the filter size. + if BitArray.unsafeIndex bitArray idx + then go (i + 1) + else False + +serialise :: Bloom a -> (BloomSize, ByteArray, Int, Int) +serialise b@Bloom{bitArray} = + (size b, ba, off, len) + where + (ba, off, len) = BitArray.serialise bitArray + + +------------------------------------------------------------------------------- +-- Conversions between mutable and immutable Bloom filters +-- + +-- | Create an immutable Bloom filter from a mutable one. The mutable +-- filter may be modified afterwards. +freeze :: MBloom s a -> ST s (Bloom a) +freeze MBloom { mbNumBits, mbNumHashes, mbBitArray } = do + bitArray <- BitArray.freeze mbBitArray + let !bf = Bloom { + numBits = mbNumBits, + numHashes = mbNumHashes, + bitArray + } + assert (bloomInvariant bf) $ pure bf + +-- | Create an immutable Bloom filter from a mutable one without copying. The +-- mutable filter /must not/ be modified afterwards. For a safer creation +-- interface, use 'freeze' or 'create'. +unsafeFreeze :: MBloom s a -> ST s (Bloom a) +unsafeFreeze MBloom { mbNumBits, mbNumHashes, mbBitArray } = do + bitArray <- BitArray.unsafeFreeze mbBitArray + let !bf = Bloom { + numBits = mbNumBits, + numHashes = mbNumHashes, + bitArray + } + assert (bloomInvariant bf) $ pure bf + +-- | Copy an immutable Bloom filter to create a mutable one. There is +-- no non-copying equivalent. +thaw :: Bloom a -> ST s (MBloom s a) +thaw Bloom { numBits, numHashes, bitArray } = do + mbBitArray <- BitArray.thaw bitArray + pure MBloom { + mbNumBits = numBits, + mbNumHashes = numHashes, + mbBitArray + } + + +------------------------------------------------------------------------------- +-- Low level utils +-- + +-- | Like 'rem' but does not check for division by 0. +unsafeRemWord64 :: Word64 -> Word64 -> Word64 +#if MIN_VERSION_base(4,17,0) +unsafeRemWord64 (W64# x#) (W64# y#) = W64# (x# `remWord64#` y#) +#else +unsafeRemWord64 (W64# x#) (W64# y#) = W64# (x# `remWord#` y#) +#endif diff --git a/bloomfilter/src/Data/BloomFilter/Classic/Mutable.hs b/bloomfilter/src/Data/BloomFilter/Classic/Mutable.hs deleted file mode 100644 index e8e05a0c1..000000000 --- a/bloomfilter/src/Data/BloomFilter/Classic/Mutable.hs +++ /dev/null @@ -1,157 +0,0 @@ --- | --- A fast, space efficient Bloom filter implementation. A Bloom --- filter is a set-like data structure that provides a probabilistic --- membership test. --- --- * Queries do not give false negatives. When an element is added to --- a filter, a subsequent membership test will definitely return --- 'True'. --- --- * False positives /are/ possible. If an element has not been added --- to a filter, a membership test /may/ nevertheless indicate that --- the element is present. --- --- This module provides low-level control. For an easier to use --- interface, see the "Data.BloomFilter.Classic.Easy" module. - -module Data.BloomFilter.Classic.Mutable ( - -- * Overview - -- $overview - - -- ** Ease of use - -- $ease - - -- ** Performance - -- $performance - - -- * Types - Hash, - MBloom (..), - CheapHashes, - -- * Mutable Bloom filters - - -- ** Creation - BloomSize (..), - new, - - -- ** Accessors - size, - elem, - - -- ** Mutation - insert, - deserialise, -) where - -import Control.Monad.Primitive (PrimState) -import Control.Monad.ST (ST) -import Data.Kind (Type) -import Data.Primitive.ByteArray (MutableByteArray) - -import qualified Data.BloomFilter.Classic.BitVec64 as V -import Data.BloomFilter.Classic.Calc (BloomSize (..)) -import Data.BloomFilter.Hash (CheapHashes, Hash, Hashable, evalHashes, - makeHashes) - -import Prelude hiding (elem) - -type MBloom :: Type -> Type -> Type --- | A mutable Bloom filter, for use within the 'ST' monad. -data MBloom s a = MBloom { - numBits :: {-# UNPACK #-} !Int -- ^ non-zero - , numHashes :: {-# UNPACK #-} !Int - , bitArray :: {-# UNPACK #-} !(V.MBitVec64 s) - } -type role MBloom nominal nominal - -instance Show (MBloom s a) where - show mb = "MBloom { " ++ show (numBits mb) ++ " bits } " - --- | Create a new mutable Bloom filter. --- --- The size is ceiled at $2^48$. Tell us if you need bigger bloom filters. --- -new :: BloomSize -> ST s (MBloom s a) -new BloomSize { sizeBits, sizeHashes = numHashes } = do - let !numBits = max 1 (min 0x1_0000_0000_0000 sizeBits) - bitArray <- V.new (fromIntegral numBits) - pure MBloom { - numBits, - numHashes, - bitArray - } - --- | Modify the filter's bit array. The callback is expected to read (exactly) --- the given number of bytes into the given byte array buffer. --- -deserialise :: MBloom (PrimState m) a - -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) - -> m () -deserialise MBloom {bitArray} fill = - V.deserialise bitArray fill - --- | Insert a value into a mutable Bloom filter. Afterwards, a --- membership query for the same value is guaranteed to return @True@. -insert :: Hashable a => MBloom s a -> a -> ST s () -insert !mb !x = insertHashes mb (makeHashes x) - -insertHashes :: MBloom s a -> CheapHashes a -> ST s () -insertHashes MBloom { numBits = m, numHashes = k, bitArray = v } !h = - go 0 - where - go !i | i >= k = return () - | otherwise = let !idx = evalHashes h i `rem` fromIntegral m - in V.unsafeWrite v idx True >> go (i + 1) - --- | Query a mutable Bloom filter for membership. If the value is --- present, return @True@. If the value is not present, there is --- /still/ some possibility that @True@ will be returned. -elem :: Hashable a => a -> MBloom s a -> ST s Bool -elem elt mb = elemHashes (makeHashes elt) mb - -elemHashes :: forall s a. CheapHashes a -> MBloom s a -> ST s Bool -elemHashes !ch MBloom { numBits = m, numHashes = k, bitArray = v } = - go 0 - where - go :: Int -> ST s Bool - go !i | i >= k = return True - | otherwise = do let !idx' = evalHashes ch i - let !idx = idx' `rem` fromIntegral m - b <- V.unsafeRead v idx - if b - then go (i + 1) - - else return False --- | Return the size of the Bloom filter. -size :: MBloom s a -> BloomSize -size MBloom { numBits, numHashes } = - BloomSize { - sizeBits = numBits, - sizeHashes = numHashes - } - --- $overview --- --- Each of the functions for creating Bloom filters accepts two parameters: --- --- * The number of bits that should be used for the filter. Note that --- a filter is fixed in size; it cannot be resized after creation. --- --- * A number of hash functions, /k/, to be used for the filter. --- --- By choosing these parameters with care, it is possible to tune for --- a particular false positive rate. --- The 'Data.BloomFilter.Classic.Easy.suggestSizing' function in --- the "Data.BloomFilter.Classic.Easy" module calculates useful estimates for --- these parameters. - --- $ease --- --- This module provides both mutable interfaces for creating and --- querying a Bloom filter. It is most useful as a low-level way to --- manage a Bloom filter with a custom set of characteristics. - --- $performance --- --- The implementation has been carefully tuned for high performance --- and low space consumption. diff --git a/bloomfilter/tests/bloomfilter-tests.hs b/bloomfilter/tests/bloomfilter-tests.hs index 94bd4c3c7..408454bdd 100644 --- a/bloomfilter/tests/bloomfilter-tests.hs +++ b/bloomfilter/tests/bloomfilter-tests.hs @@ -1,15 +1,12 @@ module Main (main) where import qualified Data.BloomFilter.Classic as B -import qualified Data.BloomFilter.Classic.BitVec64 as BV64 -import qualified Data.BloomFilter.Classic.Internal as BI import Data.BloomFilter.Hash (Hashable (..), hash64) import Data.ByteString (ByteString) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS import Data.Int (Int64) -import qualified Data.Vector.Primitive as VP import Data.Word (Word32, Word64) import Test.QuickCheck.Instances () @@ -43,15 +40,6 @@ tests = testGroup "bloomfilter" , testProperty "prop_list_ex" $ hash64 [[],[],[BS.empty]] =/= hash64 [[],[BS.empty],[]] ] - , testGroup "equality" - [ testProperty "doesn't care about leftover bits a" $ - BI.Bloom 48 1 (BV64.BV64 (VP.singleton 0xffff_0000_1234_5678)) === - BI.Bloom 48 1 (BV64.BV64 (VP.singleton 0xeeee_0000_1234_5678)) - - , testProperty "doesn't care about leftover bits b" $ - BI.Bloom 49 1 (BV64.BV64 (VP.singleton 0xffff_0000_1234_5678)) =/= - BI.Bloom 49 1 (BV64.BV64 (VP.singleton 0xeeee_0000_1234_5678)) - ] ] ------------------------------------------------------------------------------- diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 55cb43c7a..dcafd157d 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -410,23 +410,21 @@ library bloomfilter visibility: private hs-source-dirs: bloomfilter/src build-depends: - , base >=4.16 && <5 - , bitvec ^>=1.1.5.0 + , base >=4.16 && <5 , bytestring >=0.9 - , data-array-byte , deepseq , lsm-tree:xxhash , primitive - , vector ^>=0.13.0.0 exposed-modules: Data.BloomFilter Data.BloomFilter.Classic - Data.BloomFilter.Classic.BitVec64 + Data.BloomFilter.Hash + + other-modules: + Data.BloomFilter.Classic.BitArray Data.BloomFilter.Classic.Calc Data.BloomFilter.Classic.Internal - Data.BloomFilter.Classic.Mutable - Data.BloomFilter.Hash ghc-options: -O2 -Wall @@ -442,7 +440,6 @@ test-suite bloomfilter-tests , quickcheck-instances , tasty , tasty-quickcheck - , vector benchmark bloomfilter-bench import: language diff --git a/src/Database/LSMTree/Internal/RunAcc.hs b/src/Database/LSMTree/Internal/RunAcc.hs index 6d1b4ae67..cfe0d39fe 100644 --- a/src/Database/LSMTree/Internal/RunAcc.hs +++ b/src/Database/LSMTree/Internal/RunAcc.hs @@ -29,8 +29,6 @@ module Database.LSMTree.Internal.RunAcc ( , PageAcc.entryWouldFitInPage -- * Bloom filter allocation , RunBloomFilterAlloc (..) - -- ** Exposed for testing - , newMBloom ) where import Control.DeepSeq (NFData (..)) @@ -38,7 +36,6 @@ import Control.Exception (assert) import Control.Monad.ST.Strict import Data.BloomFilter (Bloom, MBloom) import qualified Data.BloomFilter as Bloom -import qualified Data.BloomFilter.Classic.Mutable as MBloom import Data.Primitive.PrimVar (PrimVar, modifyPrimVar, newPrimVar, readPrimVar) import Data.Word (Word64) @@ -84,8 +81,12 @@ new :: -> RunBloomFilterAlloc -> IndexType -> ST s (RunAcc s) -new nentries alloc indexType = do - mbloom <- newMBloom nentries alloc +new (NumEntries nentries) alloc indexType = do + --TODO: it'd be possible to cache this BloomPolicy, since it is indepedent + -- of the NumEntries, avoiding recalculating the policy every time. + let policy = bloomFilterAllocPolicy alloc + bsize = Bloom.sizeForPolicy policy nentries + mbloom <- Bloom.new bsize mindex <- Index.newWithDefaults indexType mpageacc <- PageAcc.newPageAcc entryCount <- newPrimVar 0 @@ -167,7 +168,7 @@ addSmallKeyOp :: addSmallKeyOp racc@RunAcc{..} k e = assert (PageAcc.entryWouldFitInPage k e) $ do modifyPrimVar entryCount (+1) - MBloom.insert mbloom k + Bloom.insert mbloom k pageBoundaryNeeded <- -- Try adding the key/op to the page accumulator to see if it fits. If @@ -213,7 +214,7 @@ addLargeKeyOp :: addLargeKeyOp racc@RunAcc{..} k e = assert (not (PageAcc.entryWouldFitInPage k e)) $ do modifyPrimVar entryCount (+1) - MBloom.insert mbloom k + Bloom.insert mbloom k -- If the existing page accumulator is non-empty, we flush it, since the -- new large key/op will need more than one page to itself. @@ -267,7 +268,7 @@ addLargeSerialisedKeyOp racc@RunAcc{..} k page overflowPages = assert (RawPage.rawPageOverflowPages page > 0) $ assert (RawPage.rawPageOverflowPages page == length overflowPages) $ do modifyPrimVar entryCount (+1) - MBloom.insert mbloom k + Bloom.insert mbloom k -- If the existing page accumulator is non-empty, we flush it, since the -- new large key/op will need more than one page to itself. @@ -329,12 +330,8 @@ instance NFData RunBloomFilterAlloc where rnf (RunAllocFixed a) = rnf a rnf (RunAllocRequestFPR a) = rnf a -newMBloom :: NumEntries -> RunBloomFilterAlloc -> ST s (MBloom s a) -newMBloom (NumEntries nentries) alloc = - MBloom.new (Bloom.sizeForPolicy (policy alloc) nentries) - where - --TODO: it'd be possible to turn the RunBloomFilterAlloc into a BloomPolicy - -- without the NumEntries, and cache the policy, avoiding recalculating the - -- policy every time. - policy (RunAllocFixed bitsPerEntry) = Bloom.policyForBits (fromIntegral bitsPerEntry) - policy (RunAllocRequestFPR fpr) = Bloom.policyForFPR fpr +--TODO: RunBloomFilterAlloc could probably be replaced by Bloom.BloomPolicy +bloomFilterAllocPolicy :: RunBloomFilterAlloc -> Bloom.BloomPolicy +bloomFilterAllocPolicy = \case + RunAllocFixed bitsPerEntry -> Bloom.policyForBits (fromIntegral bitsPerEntry) + RunAllocRequestFPR fpr -> Bloom.policyForFPR fpr diff --git a/test/Test/Database/LSMTree/Internal/BloomFilter.hs b/test/Test/Database/LSMTree/Internal/BloomFilter.hs index c98624b93..062905113 100644 --- a/test/Test/Database/LSMTree/Internal/BloomFilter.hs +++ b/test/Test/Database/LSMTree/Internal/BloomFilter.hs @@ -26,7 +26,6 @@ import Test.Tasty (TestTree, testGroup) import Test.Tasty.QuickCheck hiding ((.&.)) import qualified Data.BloomFilter as BF -import qualified Data.BloomFilter.Classic.Internal as BF (bloomInvariant) import Database.LSMTree.Internal.BloomFilter import qualified Database.LSMTree.Internal.BloomFilterQuery1 as Bloom1 import Database.LSMTree.Internal.Serialise (SerialisedKey, @@ -75,10 +74,7 @@ prop_total_deserialisation :: BS.ByteString -> Property prop_total_deserialisation bs = case bloomFilterFromBS bs of Left err -> label (displayException err) $ property True - Right bf -> label "parsed successfully" $ property $ - -- Just forcing the filter is not enough (e.g. the bit vector might - -- point outside of the byte array). - bf `deepseq` BF.bloomInvariant bf + Right bf -> label "parsed successfully" $ deepseq bf $ property True -- | Write the bytestring to a file in the mock file system and then use -- 'bloomFilterFromFile'. diff --git a/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs b/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs index 3f1f87312..5d4cbc847 100644 --- a/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs +++ b/test/Test/Database/LSMTree/Internal/RunBloomFilterAlloc.hs @@ -11,32 +11,21 @@ module Test.Database.LSMTree.Internal.RunBloomFilterAlloc ( -- * Main test tree tests - -- * Bloom filter construction - -- - -- A common interface to bloom filter construction, based on expected false - -- positive rates. - , BloomMaker - , mkBloomFromAlloc -- * Verifying FPRs , measureApproximateFPR - , measureExactFPR + , measureExactFPR --TODO: this is not currently used for anything, delete? ) where import Control.Exception (assert) -import Control.Monad.ST -import Data.BloomFilter (Bloom) +import Data.BloomFilter (Bloom, Hashable) import qualified Data.BloomFilter as Bloom -import qualified Data.BloomFilter.Classic.Mutable as MBloom -import Data.BloomFilter.Hash (Hashable) import Data.Foldable (Foldable (..)) import Data.Proxy (Proxy (..)) import Data.Set (Set) import qualified Data.Set as Set import Data.Word (Word64) import Database.LSMTree.Extras.Random -import qualified Database.LSMTree.Internal.Entry as LSMT -import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..), - newMBloom) +import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..)) import System.Random import Test.QuickCheck import Test.QuickCheck.Gen @@ -285,12 +274,10 @@ instance Monoid Counts where type BloomMaker a = [a] -> Bloom a --- | Create a bloom filter through the 'newMBloom' interface. Tunes the bloom --- filter according to 'RunBloomFilterAlloc'. +-- | Create a bloom filter, with size determined by a 'RunBloomFilterAlloc'. mkBloomFromAlloc :: Hashable a => RunBloomFilterAlloc -> BloomMaker a -mkBloomFromAlloc alloc xs = runST $ do - mb <- newMBloom n alloc - mapM_ (MBloom.insert mb) xs - Bloom.unsafeFreeze mb +mkBloomFromAlloc alloc = Bloom.fromList policy where - n = LSMT.NumEntries $ length xs + policy = case alloc of + RunAllocFixed bits -> Bloom.policyForBits (fromIntegral bits) + RunAllocRequestFPR fpr -> Bloom.policyForFPR fpr From 6c25424ffab28ebed8aac2a27b93936ae0357535 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Sun, 13 Apr 2025 18:42:43 +0100 Subject: [PATCH 24/43] bloomfilter: use a mildly better version of unfoldr Sync the Classic version with the Blocked version. This version uses a double nested loop to keep the filter as a constant for the inner loop. This produces mildly better code with recent GHC versions that do join points properly. --- bloomfilter/src/Data/BloomFilter/Classic.hs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/bloomfilter/src/Data/BloomFilter/Classic.hs b/bloomfilter/src/Data/BloomFilter/Classic.hs index b95ee7340..461218591 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic.hs @@ -137,11 +137,16 @@ unfold :: forall a b. -> b -- ^ initial seed -> Bloom a {-# INLINE unfold #-} -unfold bloomsize f k = create bloomsize (loop k) - where loop :: forall s. b -> MBloom s a -> ST s () - loop j mb = case f j of - Just (a, j') -> insert mb a >> loop j' mb - _ -> return () +unfold bloomsize f k = + create bloomsize body + where + body :: forall s. MBloom s a -> ST s () + body mb = loop k + where + loop :: b -> ST s () + loop !j = case f j of + Nothing -> return () + Just (a, j') -> insert mb a >> loop j' -- | Create a Bloom filter, populating it from a sequence of values. -- From 7435cc7f076b53bd62a8b459bcdb57f7cc6cfa0a Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Mon, 14 Apr 2025 19:16:53 +0100 Subject: [PATCH 25/43] bloomfilter: establish a common API for hash-based insert and elem We provide a low level API for bloom filter inserts and elem operations. These allow for sharing a single hash calculation across many filters, and potentially allows for prefetching. Now instead of a CheapHashes type and operations in the Hash module, each of the two implementations (Classic and Blocked) provide a Hashes type and constructor. The classic one uses the (renamed) CheapHashes, while the Blocked one uses its own scheme. Also tidy up the code for the Blocked Hashes functions, to make the pattern clearer. Follow the pattern of a PRNG. --- bench/macro/lsm-tree-bench-bloomfilter.hs | 29 ++-- bloomfilter/src/Data/BloomFilter/Classic.hs | 13 +- .../src/Data/BloomFilter/Classic/Internal.hs | 148 +++++++++++++++++- bloomfilter/src/Data/BloomFilter/Hash.hs | 137 ---------------- .../LSMTree/Internal/BloomFilterQuery1.hs | 7 +- 5 files changed, 167 insertions(+), 167 deletions(-) diff --git a/bench/macro/lsm-tree-bench-bloomfilter.hs b/bench/macro/lsm-tree-bench-bloomfilter.hs index 778126ba1..0339ab6e8 100644 --- a/bench/macro/lsm-tree-bench-bloomfilter.hs +++ b/bench/macro/lsm-tree-bench-bloomfilter.hs @@ -11,7 +11,6 @@ import Control.Monad.ST.Unsafe import Data.Bits ((.&.)) import Data.BloomFilter (Bloom, BloomSize) import qualified Data.BloomFilter as Bloom -import qualified Data.BloomFilter.Hash as Bloom import Data.Time import Data.Vector (Vector) import qualified Data.Vector as V @@ -92,19 +91,19 @@ benchmarks = do putStrLn "" hashcost <- - benchmark "makeCheapHashes" + benchmark "makeHashes" "(This baseline is the cost of computing and hashing the keys)" (benchInBatches benchmarkBatchSize rng0 - (benchMakeCheapHashes vbs)) + (benchMakeHashes vbs)) (fromIntegralChecked benchmarkNumLookups) (0, 0) 289 _ <- - benchmark "elemCheapHashes" + benchmark "elemHashes" "(this is the simple one-by-one lookup, less the cost of computing and hashing the keys)" (benchInBatches benchmarkBatchSize rng0 - (benchElemCheapHashes vbs)) + (benchElemHashes vbs)) (fromIntegralChecked benchmarkNumLookups) hashcost 0 @@ -277,21 +276,21 @@ benchInBatches !b !rng0 !action = -- | This gives us a combined cost of calculating the series of keys and their -- hashes (when used with 'benchInBatches'). -benchMakeCheapHashes :: Vector (Bloom SerialisedKey) -> BatchBench -benchMakeCheapHashes !_bs !ks = - let khs :: VP.Vector (Bloom.CheapHashes SerialisedKey) - !khs = V.convert (V.map Bloom.makeHashes ks) +benchMakeHashes :: Vector (Bloom SerialisedKey) -> BatchBench +benchMakeHashes !_bs !ks = + let khs :: VP.Vector (Bloom.Hashes SerialisedKey) + !khs = V.convert (V.map Bloom.hashes ks) in khs `seq` () -- | This gives us a combined cost of calculating the series of keys, their --- hashes, and then using 'Bloom.elemCheapHashes' with each filter (when used +-- hashes, and then using 'Bloom.elemHashes' with each filter (when used -- with 'benchInBatches'). -benchElemCheapHashes :: Vector (Bloom SerialisedKey) -> BatchBench -benchElemCheapHashes !bs !ks = - let khs :: VP.Vector (Bloom.CheapHashes SerialisedKey) - !khs = V.convert (V.map Bloom.makeHashes ks) +benchElemHashes :: Vector (Bloom SerialisedKey) -> BatchBench +benchElemHashes !bs !ks = + let khs :: VP.Vector (Bloom.Hashes SerialisedKey) + !khs = V.convert (V.map Bloom.hashes ks) in V.foldl' (\_ b -> VP.foldl' - (\_ kh -> Bloom.elemHashes kh b `seq` ()) + (\_ kh -> Bloom.elemHashes b kh `seq` ()) () khs) () bs diff --git a/bloomfilter/src/Data/BloomFilter/Classic.hs b/bloomfilter/src/Data/BloomFilter/Classic.hs index 461218591..36ea5c7d3 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic.hs @@ -28,7 +28,6 @@ module Data.BloomFilter.Classic ( -- * Types Hash, Hashable, - CheapHashes, -- * Immutable Bloom filters Bloom, @@ -56,19 +55,23 @@ module Data.BloomFilter.Classic ( size, elem, notElem, - elemHashes, serialise, -- * Mutable Bloom filters MBloom, new, insert, - insertHashes, -- ** Conversion freeze, thaw, unsafeFreeze, + + -- * Low level variants + Hashes, + hashes, + insertHashes, + elemHashes, ) where import Control.Monad.Primitive (PrimMonad, PrimState, RealWorld, @@ -108,13 +111,13 @@ create bloomsize body = -- | Insert a value into a mutable Bloom filter. Afterwards, a -- membership query for the same value is guaranteed to return @True@. insert :: Hashable a => MBloom s a -> a -> ST s () -insert !mb !x = insertHashes mb (makeHashes x) +insert !mb !x = insertHashes mb (hashes x) -- | Query an immutable Bloom filter for membership. If the value is -- present, return @True@. If the value is not present, there is -- /still/ some possibility that @True@ will be returned. elem :: Hashable a => a -> Bloom a -> Bool -elem elt ub = elemHashes (makeHashes elt) ub +elem !x !b = elemHashes b (hashes x) -- | Query an immutable Bloom filter for non-membership. If the value -- /is/ present, return @False@. If the value is not present, there diff --git a/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs index 32aedcc83..8c293394b 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs @@ -1,5 +1,6 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE MagicHash #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} {-# OPTIONS_HADDOCK not-home #-} -- | This module defines the 'Bloom' and 'MBloom' types and all the functions -- that need direct knowledge of and access to the representation. This forms @@ -8,12 +9,16 @@ module Data.BloomFilter.Classic.Internal ( -- * Mutable Bloom filters MBloom, new, - insertHashes, -- * Immutable Bloom filters Bloom, bloomInvariant, size, + + -- * Hash-based operations + Hashes, + hashes, + insertHashes, elemHashes, -- * Conversion @@ -32,6 +37,7 @@ import Data.Bits import Data.Kind (Type) import Data.Primitive.ByteArray import Data.Primitive.PrimArray +import Data.Primitive.Types (Prim (..)) import Data.Word (Word64) #if MIN_VERSION_base(4,17,0) @@ -39,6 +45,7 @@ import GHC.Exts (remWord64#) #else import GHC.Exts (remWord#) #endif +import GHC.Exts (Int#, uncheckedIShiftL#, (+#)) import GHC.Word (Word64 (W64#)) import Data.BloomFilter.Classic.BitArray (BitArray, MBitArray) @@ -79,7 +86,7 @@ new BloomSize { sizeBits, sizeHashes } = do mbBitArray } -insertHashes :: MBloom s a -> CheapHashes a -> ST s () +insertHashes :: MBloom s a -> Hashes a -> ST s () insertHashes MBloom { mbNumBits = m, mbNumHashes = k, mbBitArray = a } !ch = go 0 where @@ -141,8 +148,8 @@ size Bloom { numBits, numHashes } = } -- | Query an immutable Bloom filter for membership using already constructed 'Hashes' value. -elemHashes :: CheapHashes a -> Bloom a -> Bool -elemHashes !ch Bloom { numBits, numHashes, bitArray } = +elemHashes :: Bloom a -> Hashes a -> Bool +elemHashes Bloom { numBits, numHashes, bitArray } !ch = go 0 where go :: Int -> Bool @@ -218,3 +225,132 @@ unsafeRemWord64 (W64# x#) (W64# y#) = W64# (x# `remWord64#` y#) #else unsafeRemWord64 (W64# x#) (W64# y#) = W64# (x# `remWord#` y#) #endif + +------------------------------------------------------------------------------- +-- Hashes +-- + +-- | A pair of hashes used for a double hashing scheme. +-- +-- See 'evalHashes'. +data Hashes a = Hashes !Hash !Hash + deriving Show +type role Hashes nominal + +instance Prim (Hashes a) where + sizeOfType# _ = 16# + alignmentOfType# _ = 8# + + indexByteArray# ba i = Hashes + (indexByteArray# ba (indexLo i)) + (indexByteArray# ba (indexHi i)) + readByteArray# ba i s1 = + case readByteArray# ba (indexLo i) s1 of { (# s2, lo #) -> + case readByteArray# ba (indexHi i) s2 of { (# s3, hi #) -> + (# s3, Hashes lo hi #) + }} + writeByteArray# ba i (Hashes lo hi) s = + writeByteArray# ba (indexHi i) hi (writeByteArray# ba (indexLo i) lo s) + + indexOffAddr# ba i = Hashes + (indexOffAddr# ba (indexLo i)) + (indexOffAddr# ba (indexHi i)) + readOffAddr# ba i s1 = + case readOffAddr# ba (indexLo i) s1 of { (# s2, lo #) -> + case readOffAddr# ba (indexHi i) s2 of { (# s3, hi #) -> + (# s3, Hashes lo hi #) + }} + writeOffAddr# ba i (Hashes lo hi) s = + writeOffAddr# ba (indexHi i) hi (writeOffAddr# ba (indexLo i) lo s) + +indexLo :: Int# -> Int# +indexLo i = uncheckedIShiftL# i 1# + +indexHi :: Int# -> Int# +indexHi i = uncheckedIShiftL# i 1# +# 1# + +{- Note [Original Hashes] + +Compute a list of 32-bit hashes relatively cheaply. The value to +hash is inspected at most twice, regardless of the number of hashes +requested. + +We use a variant of Kirsch and Mitzenmacher's technique from \"Less +Hashing, Same Performance: Building a Better Bloom Filter\", +. + +Where Kirsch and Mitzenmacher multiply the second hash by a +coefficient, we shift right by the coefficient. This offers better +performance (as a shift is much cheaper than a multiply), and the +low order bits of the final hash stay well mixed. + +-} + +{- Note: [Hashes] + +On the first glance the 'evalHashes' scheme seems dubious. + +Firstly, it's original performance motivation is dubious. + +> multiply the second hash by a coefficient + +While the scheme double hashing scheme is presented in +theoretical analysis as + + g(i) = a + i * b + +In practice it's implemented in a loop which looks like + + g[0] = a + for (i = 1; i < k; i++) { + a += b; + g[i] = a; + } + +I.e. with just an addition. + +Secondly there is no analysis anywhere about the +'evalHashes' scheme. + +Peter Dillinger's thesis (Adaptive Approximate State Storage) +discusses various fast hashing schemes (section 6.5), +mentioning why ordinary "double hashing" is weak scheme. + +Issue 1: when second hash value is bad, e.g. not coprime with bloom filters size in bits, +we can get repetitions (worst case 0, or m/2). + +Issue 2: in bloom filter scenario, whether we do a + i * b or h0 - i * b' (with b' = -b) +as we probe all indices (as set) doesn't matter, not sequentially (like in hash table). +So we lose one bit entropy. + +Issue 3: the scheme is prone to partial overlap. +Two values with the same second hash value could overlap on many indices. + +Then Dillinger discusses various schemes which solve this issue. + +The Hashes scheme seems to avoid these cuprits. +This is probably because it uses most of the bits of the second hash, even in m = 2^n scenarios. +(normal double hashing and enhances double hashing don't use the high bits or original hash then). +TL;DR Hashes seems to work well in practice. + +For the record: RocksDB uses an own scheme as well, +where first hash is used to pick a cache line, and second one to generate probes inside it. +https://github.com/facebook/rocksdb/blob/096fb9b67d19a9a180e7c906b4a0cdb2b2d0c1f6/util/bloom_impl.h + +-} + +-- | Evalute 'Hashes' family. +-- +-- \[ +-- g_i = h_0 + \left\lfloor h_1 / 2^i \right\rfloor +-- \] +-- +evalHashes :: Hashes a -> Int -> Hash +evalHashes (Hashes h1 h2) i = h1 + (h2 `unsafeShiftR` i) + +-- | Create 'Hashes' structure. +-- +-- It's simply hashes the value twice using seed 0 and 1. +hashes :: Hashable a => a -> Hashes a +hashes v = Hashes (hashSalt64 0 v) (hashSalt64 1 v) +{-# INLINE hashes #-} diff --git a/bloomfilter/src/Data/BloomFilter/Hash.hs b/bloomfilter/src/Data/BloomFilter/Hash.hs index 6b52906c9..dd362305b 100644 --- a/bloomfilter/src/Data/BloomFilter/Hash.hs +++ b/bloomfilter/src/Data/BloomFilter/Hash.hs @@ -15,22 +15,15 @@ module Data.BloomFilter.Hash ( Incremental (..), HashState, incrementalHash, - -- * Compute a family of hash values - CheapHashes (..), - evalHashes, - makeHashes, ) where import Control.Monad (forM_) import Control.Monad.ST (ST, runST) -import Data.Bits (unsafeShiftR) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS import Data.Char (ord) import qualified Data.Primitive.ByteArray as P -import Data.Primitive.Types (Prim (..)) import Data.Word (Word32, Word64) -import GHC.Exts (Int#, uncheckedIShiftL#, (+#)) import qualified XXH3 -- | A hash value is 64 bits wide. @@ -138,133 +131,3 @@ incrementalHash seed f = runST $ do XXH3.xxh3_64bit_reset_withSeed s seed f (HashState s) XXH3.xxh3_64bit_digest s - -------------------------------------------------------------------------------- --- CheapHashes -------------------------------------------------------------------------------- - --- | A pair of hashes used for a double hashing scheme. --- --- See 'evalCheapHashes'. -data CheapHashes a = CheapHashes !Hash !Hash - deriving Show -type role CheapHashes nominal - -instance Prim (CheapHashes a) where - sizeOfType# _ = 16# - alignmentOfType# _ = 8# - - indexByteArray# ba i = CheapHashes - (indexByteArray# ba (indexLo i)) - (indexByteArray# ba (indexHi i)) - readByteArray# ba i s1 = - case readByteArray# ba (indexLo i) s1 of { (# s2, lo #) -> - case readByteArray# ba (indexHi i) s2 of { (# s3, hi #) -> - (# s3, CheapHashes lo hi #) - }} - writeByteArray# ba i (CheapHashes lo hi) s = - writeByteArray# ba (indexHi i) hi (writeByteArray# ba (indexLo i) lo s) - - indexOffAddr# ba i = CheapHashes - (indexOffAddr# ba (indexLo i)) - (indexOffAddr# ba (indexHi i)) - readOffAddr# ba i s1 = - case readOffAddr# ba (indexLo i) s1 of { (# s2, lo #) -> - case readOffAddr# ba (indexHi i) s2 of { (# s3, hi #) -> - (# s3, CheapHashes lo hi #) - }} - writeOffAddr# ba i (CheapHashes lo hi) s = - writeOffAddr# ba (indexHi i) hi (writeOffAddr# ba (indexLo i) lo s) - -indexLo :: Int# -> Int# -indexLo i = uncheckedIShiftL# i 1# - -indexHi :: Int# -> Int# -indexHi i = uncheckedIShiftL# i 1# +# 1# - -{- Note [Original CheapHashes] - -Compute a list of 32-bit hashes relatively cheaply. The value to -hash is inspected at most twice, regardless of the number of hashes -requested. - -We use a variant of Kirsch and Mitzenmacher's technique from \"Less -Hashing, Same Performance: Building a Better Bloom Filter\", -. - -Where Kirsch and Mitzenmacher multiply the second hash by a -coefficient, we shift right by the coefficient. This offers better -performance (as a shift is much cheaper than a multiply), and the -low order bits of the final hash stay well mixed. - --} - -{- Note: [CheapHashes] - -On the first glance the 'evalCheapHashes' scheme seems dubious. - -Firstly, it's original performance motivation is dubious. - -> multiply the second hash by a coefficient - -While the scheme double hashing scheme is presented in -theoretical analysis as - - g(i) = a + i * b - -In practice it's implemented in a loop which looks like - - g[0] = a - for (i = 1; i < k; i++) { - a += b; - g[i] = a; - } - -I.e. with just an addition. - -Secondly there is no analysis anywhere about the -'evalCheapHashes' scheme. - -Peter Dillinger's thesis (Adaptive Approximate State Storage) -discusses various fast hashing schemes (section 6.5), -mentioning why ordinary "double hashing" is weak scheme. - -Issue 1: when second hash value is bad, e.g. not coprime with bloom filters size in bits, -we can get repetitions (worst case 0, or m/2). - -Issue 2: in bloom filter scenario, whether we do a + i * b or h0 - i * b' (with b' = -b) -as we probe all indices (as set) doesn't matter, not sequentially (like in hash table). -So we lose one bit entropy. - -Issue 3: the scheme is prone to partial overlap. -Two values with the same second hash value could overlap on many indices. - -Then Dillinger discusses various schemes which solve this issue. - -The CheapHashes scheme seems to avoid these cuprits. -This is probably because it uses most of the bits of the second hash, even in m = 2^n scenarios. -(normal double hashing and enhances double hashing don't use the high bits or original hash then). -TL;DR CheapHashes seems to work well in practice. - -For the record: RocksDB uses an own scheme as well, -where first hash is used to pick a cache line, and second one to generate probes inside it. -https://github.com/facebook/rocksdb/blob/096fb9b67d19a9a180e7c906b4a0cdb2b2d0c1f6/util/bloom_impl.h - --} - --- | Evalute 'CheapHashes' family. --- --- \[ --- g_i = h_0 + \left\lfloor h_1 / 2^i \right\rfloor --- \] --- -evalHashes :: CheapHashes a -> Int -> Hash -evalHashes (CheapHashes h1 h2) i = h1 + (h2 `unsafeShiftR` i) - --- | Create 'CheapHashes' structure. --- --- It's simply hashes the value twice using seed 0 and 1. -makeHashes :: Hashable a => a -> CheapHashes a -makeHashes v = CheapHashes (hashSalt64 0 v) (hashSalt64 1 v) -{-# SPECIALISE makeHashes :: BS.ByteString -> CheapHashes BS.ByteString #-} -{-# INLINEABLE makeHashes #-} diff --git a/src/Database/LSMTree/Internal/BloomFilterQuery1.hs b/src/Database/LSMTree/Internal/BloomFilterQuery1.hs index cf65a1d8a..35b249bb1 100644 --- a/src/Database/LSMTree/Internal/BloomFilterQuery1.hs +++ b/src/Database/LSMTree/Internal/BloomFilterQuery1.hs @@ -22,7 +22,6 @@ import Control.Monad.ST (ST) import Data.BloomFilter (Bloom) import qualified Data.BloomFilter as Bloom -import qualified Data.BloomFilter.Hash as Bloom import Database.LSMTree.Internal.Serialise (SerialisedKey) @@ -94,8 +93,8 @@ bloomQueries !blooms !ks !rsN = V.length blooms !ksN = V.length ks - hs :: VP.Vector (Bloom.CheapHashes SerialisedKey) - !hs = VP.generate ksN $ \i -> Bloom.makeHashes (V.unsafeIndex ks i) + hs :: VP.Vector (Bloom.Hashes SerialisedKey) + !hs = VP.generate ksN $ \i -> Bloom.hashes (V.unsafeIndex ks i) -- Loop over all run indexes loop1 :: @@ -120,7 +119,7 @@ bloomQueries !blooms !ks loop2 !res2 !resix2 !kix !b | kix == ksN = pure (res2, resix2) | let !h = hs `VP.unsafeIndex` kix - , Bloom.elemHashes h b = do + , Bloom.elemHashes b h = do -- Double the vector if we've reached the end. -- Note unsafeGrow takes the number to grow by, not the new size. res2' <- if resix2 == VPM.length res2 From f6a71886cce9039c09be1722fa59daed47bb30a8 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Thu, 10 Apr 2025 22:13:08 +0100 Subject: [PATCH 26/43] bloomfilter: Add new Data.BloomFilter.Blocked implementation This is a blocked bloom filter where the bit array is split into 64byte blocks. Instead of probing bits randomly across the whole bit array (which needs one cache line load per bit probe), it first selects one block and then selects a number of bits within the block. So the same number of bit probes are done overall, but they're all clustered within one block. The blocks are cache line sized and aligned to cache boundaries. Thus lookups and inserts only cost a single cache line load (or load and store for an insert). --- bloomfilter/src/Data/BloomFilter/Blocked.hs | 176 ++++++++++ .../src/Data/BloomFilter/Blocked/BitArray.hs | 185 +++++++++++ .../src/Data/BloomFilter/Blocked/Calc.hs | 96 ++++++ .../src/Data/BloomFilter/Blocked/Internal.hs | 304 ++++++++++++++++++ lsm-tree.cabal | 4 + 5 files changed, 765 insertions(+) create mode 100644 bloomfilter/src/Data/BloomFilter/Blocked.hs create mode 100644 bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs create mode 100644 bloomfilter/src/Data/BloomFilter/Blocked/Calc.hs create mode 100644 bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs diff --git a/bloomfilter/src/Data/BloomFilter/Blocked.hs b/bloomfilter/src/Data/BloomFilter/Blocked.hs new file mode 100644 index 000000000..ffcc0c558 --- /dev/null +++ b/bloomfilter/src/Data/BloomFilter/Blocked.hs @@ -0,0 +1,176 @@ +-- | +-- +-- A fast, space efficient Bloom filter implementation. A Bloom +-- filter is a set-like data structure that provides a probabilistic +-- membership test. +-- +-- * Queries do not give false negatives. When an element is added to +-- a filter, a subsequent membership test will definitely return +-- 'True'. +-- +-- * False positives /are/ possible. If an element has not been added +-- to a filter, a membership test /may/ nevertheless indicate that +-- the element is present. +-- + +module Data.BloomFilter.Blocked ( + -- * Types + Hash, + Hashable, + + -- * Immutable Bloom filters + Bloom, + + -- ** Creation + create, + unfold, + fromList, + deserialise, + + -- ** Sizes + NumEntries, + BloomSize (..), + FPR, + sizeForFPR, + BitsPerEntry, + sizeForBits, + sizeForPolicy, + BloomPolicy (..), + policyFPR, + policyForFPR, + policyForBits, + + -- ** Accessors + size, + elem, + notElem, + serialise, + + -- * Mutable Bloom filters + MBloom, + new, + insert, + + -- ** Conversion + freeze, + thaw, + unsafeFreeze, + + -- * Low level variants + Hashes, + hashes, + insertHashes, + elemHashes, + -- ** Prefetching + prefetchInsert, + prefetchElem, +) where + +import Control.Monad.Primitive (PrimMonad, PrimState, RealWorld, + stToPrim) +import Control.Monad.ST (ST, runST) +import Data.Primitive.ByteArray (MutableByteArray) + +import Data.BloomFilter.Blocked.Calc +import Data.BloomFilter.Blocked.Internal hiding (deserialise) +import qualified Data.BloomFilter.Blocked.Internal as Internal +import Data.BloomFilter.Hash + +import Prelude hiding (elem, notElem) + +-- | Create an immutable Bloom filter, using the given setup function +-- which executes in the 'ST' monad. +-- +-- Example: +-- +-- @ +--filter = create (sizeForBits 16 2) $ \mf -> do +-- insert mf \"foo\" +-- insert mf \"bar\" +-- @ +-- +-- Note that the result of the setup function is not used. +create :: BloomSize + -> (forall s. (MBloom s a -> ST s ())) -- ^ setup function + -> Bloom a +{-# INLINE create #-} +create bloomsize body = + runST $ do + mb <- new bloomsize + body mb + unsafeFreeze mb + +{-# INLINEABLE insert #-} +-- | Insert a value into a mutable Bloom filter. Afterwards, a +-- membership query for the same value is guaranteed to return @True@. +insert :: Hashable a => MBloom s a -> a -> ST s () +insert = \ !mb !x -> insertHashes mb (hashes x) + +{-# INLINE elem #-} +-- | Query an immutable Bloom filter for membership. If the value is +-- present, return @True@. If the value is not present, there is +-- /still/ some possibility that @True@ will be returned. +elem :: Hashable a => a -> Bloom a -> Bool +elem = \ !x !b -> elemHashes b (hashes x) + +{-# INLINE notElem #-} +-- | Query an immutable Bloom filter for non-membership. If the value +-- /is/ present, return @False@. If the value is not present, there +-- is /still/ some possibility that @False@ will be returned. +notElem :: Hashable a => a -> Bloom a -> Bool +notElem = \x b -> not (x `elem` b) + +-- | Build an immutable Bloom filter from a seed value. The seeding +-- function populates the filter as follows. +-- +-- * If it returns 'Nothing', it is finished producing values to +-- insert into the filter. +-- +-- * If it returns @'Just' (a,b)@, @a@ is added to the filter and +-- @b@ is used as a new seed. +unfold :: forall a b. + Hashable a + => BloomSize + -> (b -> Maybe (a, b)) -- ^ seeding function + -> b -- ^ initial seed + -> Bloom a +{-# INLINE unfold #-} +unfold bloomsize f k = + create bloomsize body + where + body :: forall s. MBloom s a -> ST s () + body mb = loop k + where + loop :: b -> ST s () + loop !j = case f j of + Nothing -> return () + Just (a, j') -> insert mb a >> loop j' + +-- | Create a Bloom filter, populating it from a sequence of values. +-- +-- For example +-- +-- @ +-- filt = fromList (policyForBits 10) [\"foo\", \"bar\", \"quux\"] +-- @ +fromList :: (Foldable t, Hashable a) + => BloomPolicy + -> t a -- ^ values to populate with + -> Bloom a +fromList policy xs = + create bsize (\b -> mapM_ (insert b) xs) + where + bsize = sizeForPolicy policy (length xs) + +{-# SPECIALISE deserialise :: BloomSize + -> (MutableByteArray RealWorld -> Int -> Int -> IO ()) + -> IO (Bloom a) #-} +deserialise :: PrimMonad m + => BloomSize + -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) + -> m (Bloom a) +deserialise bloomsize fill = do + mbloom <- stToPrim $ new bloomsize + Internal.deserialise mbloom fill + stToPrim $ unsafeFreeze mbloom + diff --git a/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs b/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs new file mode 100644 index 000000000..833ff1c30 --- /dev/null +++ b/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs @@ -0,0 +1,185 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +-- | Blocked bit array implementation. This uses blocks of 64 bytes, aligned +-- to 64byte boundaries to match typical cache line sizes. This means that +-- multiple accesses to the same block only require a single cache line load +-- or store. +module Data.BloomFilter.Blocked.BitArray ( + bitsToBlocks, + blocksToBits, + BlockIx (..), + BitIx (..), + BitArray (..), + unsafeIndex, + prefetchIndex, + MBitArray (..), + new, + unsafeSet, + prefetchSet, + freeze, + unsafeFreeze, + thaw, + serialise, + deserialise, +) where + +import Control.Exception (assert) +import Control.Monad.Primitive (PrimMonad, PrimState) +import Control.Monad.ST (ST) +import Data.Bits +import Data.Primitive.ByteArray +import Data.Primitive.PrimArray +import Data.Word (Word64, Word8) + +import GHC.Exts (Int (I#), prefetchByteArray0#, + prefetchMutableByteArray3#) +import GHC.ST (ST (ST)) + +-- | An array of blocks of bits. +-- +-- Each block is 512 bits (64 bytes large), corresponding to a cache line on +-- most current architectures. +-- +-- It is represented by an array of 'Word64'. This array is aligned to 64 bytes +-- so that multiple accesses within a single block will use only one cache line. +-- +newtype BitArray = BitArray (PrimArray Word64) + deriving (Eq, Show) + +-- | The number of 512-bit blocks for the given number of bits. This rounds +-- up to the nearest multiple of 512. +bitsToBlocks :: Int -> Int +bitsToBlocks n = (n+511) `div` 512 -- rounded up + +blocksToBits :: Int -> Int +blocksToBits n = n * 512 + +newtype BlockIx = BlockIx Word +newtype BitIx = BitIx Int + +{-# INLINE unsafeIndex #-} +unsafeIndex :: BitArray -> BlockIx -> BitIx -> Bool +unsafeIndex (BitArray arr) blockIx blockBitIx = + assert (wordIx >= 0 && wordIx < sizeofPrimArray arr) $ + indexPrimArray arr wordIx `unsafeTestBit` wordBitIx + where + (wordIx, wordBitIx) = wordAndBitIndex blockIx blockBitIx + +{-# INLINE prefetchIndex #-} +prefetchIndex :: BitArray -> BlockIx -> ST s () +prefetchIndex (BitArray (PrimArray ba#)) (BlockIx blockIx) = + -- For reading, we want to prefetch such that we do least disturbence of + -- the caches. We will typically not keep this cache line longer than one + -- read. + let !i@(I# i#) = fromIntegral blockIx `shiftL` 6 in + -- blockIx * 64 to go from block index to the byte offset of the beginning + -- of the block. This offset is in bytes, not words. + + assert (i >= 0 && i <= sizeofByteArray (ByteArray ba#)) $ + + ST (\s -> case prefetchByteArray0# ba# i# s of + s' -> (# s', () #)) + +newtype MBitArray s = MBitArray (MutablePrimArray s Word64) + +-- | We create an explicitly pinned byte array, aligned to 64 bytes. +-- +new :: Int -> ST s (MBitArray s) +new numBlocks = do + mba@(MutableByteArray mba#) <- newAlignedPinnedByteArray numBytes 64 + setByteArray mba 0 numBytes (0 :: Word8) + return (MBitArray (MutablePrimArray mba#)) + where + !numBytes = numBlocks * 64 + +serialise :: BitArray -> (ByteArray, Int, Int) +serialise bitArray = + let ba = asByteArray bitArray + in (ba, 0, sizeofByteArray ba) + where + asByteArray (BitArray (PrimArray ba#)) = ByteArray ba# + +-- | Do an inplace overwrite of the byte array representing the bit block. +deserialise :: PrimMonad m + => MBitArray (PrimState m) + -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) + -> m () +deserialise bitArray fill = do + let mba = asMutableByteArray bitArray + len <- getSizeofMutableByteArray mba + fill mba 0 len + where + asMutableByteArray (MBitArray (MutablePrimArray mba#)) = + MutableByteArray mba# + +unsafeSet :: MBitArray s -> BlockIx -> BitIx -> ST s () +unsafeSet (MBitArray arr) blockIx blockBitIx = do +#ifdef NO_IGNORE_ASSERTS + sz <- getSizeofMutablePrimArray arr + assert (wordIx >= 0 && wordIx <= sz) $ return () +#endif + w <- readPrimArray arr wordIx + writePrimArray arr wordIx (unsafeSetBit w wordBitIx) + where + (wordIx, wordBitIx) = wordAndBitIndex blockIx blockBitIx + +{-# INLINE prefetchSet #-} +prefetchSet :: MBitArray s -> BlockIx -> ST s () +prefetchSet (MBitArray (MutablePrimArray mba#)) (BlockIx blockIx) = do + -- For setting, we will do several writes to the same cache line, so + -- read it into all 3 levels of cache. + let !(I# i#) = fromIntegral blockIx `shiftL` 6 + -- blockIx * 64 to go from block index to the byte offset of the beginning + -- of the block. This offset is in bytes, not words. + +#ifdef NO_IGNORE_ASSERTS + sz <- getSizeofMutableByteArray (MutableByteArray mba#) + assert (let i = I# i# in i >= 0 && i <= sz) $ return () +#endif + + ST (\s -> case prefetchMutableByteArray3# mba# i# s of + s' -> (# s', () #)) + +freeze :: MBitArray s -> ST s BitArray +freeze (MBitArray arr) = do + len <- getSizeofMutablePrimArray arr + BitArray <$> freezePrimArray arr 0 len + +unsafeFreeze :: MBitArray s -> ST s BitArray +unsafeFreeze (MBitArray arr) = + BitArray <$> unsafeFreezePrimArray arr + +thaw :: BitArray -> ST s (MBitArray s) +thaw (BitArray arr) = + MBitArray <$> thawPrimArray arr 0 (sizeofPrimArray arr) + +{-# INLINE wordAndBitIndex #-} +-- | Given the index of the 512 bit block, and the index of the bit within the +-- block, compute the index of the word in the array, and index of the bit +-- within the word. +-- +wordAndBitIndex :: BlockIx -> BitIx -> (Int, Int) +wordAndBitIndex (BlockIx blockIx) (BitIx blockBitIx) = + assert (blockBitIx < 512) $ + (wordIx, wordBitIx) + where + -- Select the Word64 in the underlying array based on the block index + -- and the bit index. + -- * There are 8 Word64s in each 64byte block. + -- * Use 3 bits (bits 6..8) to select the Word64 within the block + wordIx = fromIntegral blockIx `shiftL` 3 + + (blockBitIx `shiftR` 6) .&. 7 + + -- Bits 0..5 of blockBitIx select the bit within Word64 + wordBitIx = blockBitIx .&. 63 + +{-# INLINE unsafeTestBit #-} +-- like testBit but using unsafeShiftL instead of shiftL +unsafeTestBit :: Word64 -> Int -> Bool +unsafeTestBit w k = w .&. (1 `unsafeShiftL` k) /= 0 + +{-# INLINE unsafeSetBit #-} +-- like setBit but using unsafeShiftL instead of shiftL +unsafeSetBit :: Word64 -> Int -> Word64 +unsafeSetBit w k = w .|. (1 `unsafeShiftL` k) diff --git a/bloomfilter/src/Data/BloomFilter/Blocked/Calc.hs b/bloomfilter/src/Data/BloomFilter/Blocked/Calc.hs new file mode 100644 index 000000000..aac49c6ef --- /dev/null +++ b/bloomfilter/src/Data/BloomFilter/Blocked/Calc.hs @@ -0,0 +1,96 @@ +-- | Various formulas for working with bloomfilters. +module Data.BloomFilter.Blocked.Calc ( + NumEntries, + BloomSize (..), + FPR, + sizeForFPR, + BitsPerEntry, + sizeForBits, + sizeForPolicy, + BloomPolicy (..), + policyFPR, + policyForFPR, + policyForBits, +) where + +import Data.BloomFilter.Classic.Calc (BitsPerEntry, BloomPolicy (..), + BloomSize (..), FPR, NumEntries) + +policyForFPR :: FPR -> BloomPolicy +policyForFPR fpr | fpr <= 0 || fpr >= 1 = + error "bloomPolicyForFPR: fpr out of range (0,1)" + +policyForFPR fpr = + BloomPolicy { + policyBits = c, + policyHashes = k + } + where + k :: Int + k = max 1 (round (recip_log2 * log_fpr)) + c = log_fpr * log_fpr * f2 + + log_fpr * f1 + + f0 + log_fpr = negate (log fpr) + + -- These parameters are from a (quadratic) linear regression in log space + -- of samples of the actual FPR between 1 and 20 bits. This is with log FPR + -- as the indepedent variable and bits as the depedent variable. + f2,f1,f0 :: Double + f2 = 8.035531421107756e-2 + f1 = 1.653017726702572 + f0 = 0.5343568065075601 +{- +Regression, FPR indepedent, bits depedent: +Fit {fitParams = V3 8.035531421107756e-2 1.653017726702572 0.5343568065075601, fitErrors = V3 7.602655075308541e-4 8.422591688796256e-3 2.0396917012822195e-2, fitNDF = 996, fitWSSR = 18.362899348627252} +-} + +policyForBits :: BitsPerEntry -> BloomPolicy +policyForBits c | c < 0 || c > 64 = + error "policyForBits: out of ragnge [0,64]" + +policyForBits c = + BloomPolicy { + policyBits = c, + policyHashes = k + } + where + k = max 1 (round (c * log2)) + +policyFPR :: BloomPolicy -> FPR +policyFPR BloomPolicy { + policyBits = c + } = + exp (negate (c*c*f2 + c*f1 + f0)) + where + -- These parameters are from a (quadratic) linear regression in log space + -- of samples of the actual FPR between 2 and 24 bits. This is with bits as + -- the indepedent variable and log FPR as the depedent variable. + f2,f1,f0 :: Double + f2 = -4.990533525011442e-3 + f1 = 0.5236326626983274 + f0 = -9.08567744857578e-2 +{- +Regression, bits indepedent, FPR depedent: +Fit {fitParams = V3 (-4.990533525011442e-3) 0.5236326626983274 (-9.08567744857578e-2), fitErrors = V3 3.2672398863476205e-5 8.69874829861453e-4 4.98365450607998e-3, fitNDF = 996, fitWSSR = 1.4326826384055948} +-} + +sizeForFPR :: FPR -> NumEntries -> BloomSize +sizeForFPR = sizeForPolicy . policyForFPR + +sizeForBits :: BitsPerEntry -> NumEntries -> BloomSize +sizeForBits = sizeForPolicy . policyForBits + +sizeForPolicy :: BloomPolicy -> NumEntries -> BloomSize +sizeForPolicy BloomPolicy { + policyBits = c, + policyHashes = k + } n = + BloomSize { + sizeBits = max 1 (ceiling (fromIntegral n * c)), + sizeHashes = max 1 k + } + +log2, recip_log2 :: Double +log2 = log 2 +recip_log2 = recip log2 diff --git a/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs b/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs new file mode 100644 index 000000000..07acc456b --- /dev/null +++ b/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs @@ -0,0 +1,304 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +{-# OPTIONS_HADDOCK not-home #-} + +-- | This module defines the 'Bloom' and 'MBloom' types and all the functions +-- that need direct knowledge of and access to the representation. This forms +-- the trusted base. +module Data.BloomFilter.Blocked.Internal ( + -- * Mutable Bloom filters + MBloom, + new, + + -- * Immutable Bloom filters + Bloom, + bloomInvariant, + size, + + -- * Hash-based operations + Hashes, + hashes, + insertHashes, + prefetchInsert, + elemHashes, + prefetchElem, + + -- * Conversion + serialise, + deserialise, + freeze, + unsafeFreeze, + thaw, + ) where + +import Control.DeepSeq (NFData (..)) +import Control.Exception (assert) +import Control.Monad.Primitive (PrimMonad, PrimState) +import Control.Monad.ST (ST) +import Data.Bits +import Data.Kind (Type) +import Data.Primitive.ByteArray +import Data.Primitive.PrimArray +import Data.Primitive.Types (Prim (..)) + +import Data.BloomFilter.Blocked.BitArray (BitArray, BitIx (..), + BlockIx (..), MBitArray, bitsToBlocks, blocksToBits) +import qualified Data.BloomFilter.Blocked.BitArray as BitArray +import Data.BloomFilter.Classic.Calc +import Data.BloomFilter.Hash + +------------------------------------------------------------------------------- +-- Mutable Bloom filters +-- + +type MBloom :: Type -> Type -> Type +-- | A mutable Bloom filter, for use within the 'ST' monad. +data MBloom s a = MBloom { + mbNumBlocks :: {-# UNPACK #-} !Int -- ^ non-zero + , mbNumHashes :: {-# UNPACK #-} !Int + , mbBitArray :: {-# UNPACK #-} !(MBitArray s) + } +type role MBloom nominal nominal + +instance Show (MBloom s a) where + show mb = "MBloom { " ++ show numBits ++ " bits } " + where + numBits = blocksToBits (mbNumBlocks mb) + +instance NFData (MBloom s a) where + rnf !_ = () + +-- | Create a new mutable Bloom filter. +-- +-- The maximum size is $2^41$ bits (256 Gbytes). Tell us if you need bigger +-- bloom filters. +-- +new :: BloomSize -> ST s (MBloom s a) +new BloomSize { sizeBits, sizeHashes } = do + let numBlocks :: Int + numBlocks = max 1 (bitsToBlocks sizeBits) + .&. 0xffff_ffff + mbBitArray <- BitArray.new numBlocks + pure MBloom { + mbNumBlocks = numBlocks, + mbNumHashes = max 1 sizeHashes, + mbBitArray + } + +{-# NOINLINE insertHashes #-} +insertHashes :: forall s a. MBloom s a -> Hashes a -> ST s () +insertHashes MBloom { mbNumBlocks, mbNumHashes, mbBitArray } !h = + go g0 mbNumHashes + where + blockIx :: BlockIx + (!blockIx, !g0) = blockIxAndBitGen h mbNumBlocks + + go :: BitIxGen -> Int -> ST s () + go !_ 0 = return () + go !g !i = do + let blockBitIx :: BitIx + (!blockBitIx, !g') = genBitIndex g + assert (let BlockIx b = blockIx + in b >= 0 && b < fromIntegral mbNumBlocks) $ + BitArray.unsafeSet mbBitArray blockIx blockBitIx + go g' (i-1) + +prefetchInsert :: MBloom s a -> Hashes a -> ST s () +prefetchInsert MBloom { mbNumBlocks, mbBitArray } !h = + BitArray.prefetchSet mbBitArray blockIx + where + blockIx :: BlockIx + (!blockIx, _) = blockIxAndBitGen h mbNumBlocks + +-- | Modify the filter's bit array. The callback is expected to read (exactly) +-- the given number of bytes into the given byte array buffer. +-- +deserialise :: PrimMonad m + => MBloom (PrimState m) a + -> (MutableByteArray (PrimState m) -> Int -> Int -> m ()) + -> m () +deserialise MBloom {mbBitArray} fill = + BitArray.deserialise mbBitArray fill + + +------------------------------------------------------------------------------- +-- Immutable Bloom filters +-- + +type Bloom :: Type -> Type +-- | An immutable Bloom filter. +data Bloom a = Bloom { + numBlocks :: {-# UNPACK #-} !Int -- ^ non-zero + , numHashes :: {-# UNPACK #-} !Int + , bitArray :: {-# UNPACK #-} !BitArray + } + deriving Eq +type role Bloom nominal + +bloomInvariant :: Bloom a -> Bool +bloomInvariant Bloom { numBlocks, bitArray = BitArray.BitArray pa } = + fromIntegral numBlocks * 8 == sizeofPrimArray pa + +instance Show (Bloom a) where + show mb = "Bloom { " ++ show numBits ++ " bits } " + where + numBits = blocksToBits (fromIntegral (numBlocks mb)) + +instance NFData (Bloom a) where + rnf !_ = () + +-- | Return the size of the Bloom filter. +size :: Bloom a -> BloomSize +size Bloom { numBlocks, numHashes } = + BloomSize { + sizeBits = blocksToBits numBlocks, + sizeHashes = numHashes + } + +-- | Query an immutable Bloom filter for membership using already constructed +-- 'Hash' value. +elemHashes :: Bloom a -> Hashes a -> Bool +elemHashes Bloom { numBlocks, numHashes, bitArray } !h = + go g0 numHashes + where + blockIx :: BlockIx + (!blockIx, !g0) = blockIxAndBitGen h numBlocks + + go :: BitIxGen -> Int -> Bool + go !_ 0 = True + go !g !i + | let blockBitIx :: BitIx + (!blockBitIx, !g') = genBitIndex g + , assert (let BlockIx b = blockIx + in b >= 0 && b < fromIntegral numBlocks) $ + BitArray.unsafeIndex bitArray blockIx blockBitIx + = go g' (i-1) + + | otherwise = False + +prefetchElem :: Bloom a -> Hashes a -> ST s () +prefetchElem Bloom { numBlocks, bitArray } !h = + BitArray.prefetchIndex bitArray blockIx + where + blockIx :: BlockIx + (!blockIx, _) = blockIxAndBitGen h numBlocks + +serialise :: Bloom a -> (BloomSize, ByteArray, Int, Int) +serialise b@Bloom{bitArray} = + (size b, ba, off, len) + where + (ba, off, len) = BitArray.serialise bitArray + + +------------------------------------------------------------------------------- +-- Conversions between mutable and immutable Bloom filters +-- + +-- | Create an immutable Bloom filter from a mutable one. The mutable +-- filter may be modified afterwards. +freeze :: MBloom s a -> ST s (Bloom a) +freeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do + bitArray <- BitArray.freeze mbBitArray + let !bf = Bloom { + numBlocks = mbNumBlocks, + numHashes = mbNumHashes, + bitArray + } + assert (bloomInvariant bf) $ pure bf + +-- | Create an immutable Bloom filter from a mutable one without copying. The +-- mutable filter /must not/ be modified afterwards. For a safer creation +-- interface, use 'freeze' or 'create'. +unsafeFreeze :: MBloom s a -> ST s (Bloom a) +unsafeFreeze MBloom { mbNumBlocks, mbNumHashes, mbBitArray } = do + bitArray <- BitArray.unsafeFreeze mbBitArray + let !bf = Bloom { + numBlocks = mbNumBlocks, + numHashes = mbNumHashes, + bitArray + } + assert (bloomInvariant bf) $ pure bf + +-- | Copy an immutable Bloom filter to create a mutable one. There is +-- no non-copying equivalent. +thaw :: Bloom a -> ST s (MBloom s a) +thaw Bloom { numBlocks, numHashes, bitArray } = do + mbBitArray <- BitArray.thaw bitArray + pure MBloom { + mbNumBlocks = numBlocks, + mbNumHashes = numHashes, + mbBitArray + } + + +------------------------------------------------------------------------------- +-- Low level utils +-- + +{-# INLINE reduceRange32 #-} +-- | Given a word sampled uniformly from the full 'Word32' range, such as a +-- hash, reduce it fairly to a value in the range @[0,n)@. +-- +-- See +-- +reduceRange32 :: Word -- ^ Sample from 0..2^32-1 + -> Word -- ^ upper bound of range [0,n) + -> Word -- ^ result within range +reduceRange32 x n = + assert (n > 0) $ + let w :: Word + w = x * n + in w `shiftR` 32 + +------------------------------------------------------------------------------- +-- Hashes +-- + +-- | A small family of hashes, for probing bits in a (blocked) bloom filter. +-- +newtype Hashes a = Hashes Hash + deriving stock Show + deriving newtype Prim +type role Hashes nominal + +{-# INLINE hashes #-} +hashes :: Hashable a => a -> Hashes a +hashes = Hashes . hash64 + +{-# INLINE blockIxAndBitGen #-} +-- | The scheme for turning 'Hashes' into block and bit indexes is as follows: +-- the high 32bits of the 64bit hash select the block of bits, while the low +-- 32bits are used with a simpler PRNG to produce a sequence of probe points +-- withi the selected 512bit block. +-- +blockIxAndBitGen :: Hashes a -> Int -> (BlockIx, BitIxGen) +blockIxAndBitGen (Hashes w64) numBlocks = + assert (numBlocks > 0) $ + (blockIx, bitGen) + where + blockIx = BlockIx (high32 `reduceRange32` fromIntegral numBlocks) + bitGen = BitIxGen low32 + + high32, low32 :: Word + high32 = fromIntegral (w64 `shiftR` 32) + low32 = fromIntegral w64 .&. 0xffff_ffff + +newtype BitIxGen = BitIxGen Word + +{-# INLINE genBitIndex #-} +-- | Generate the next in a (short) short sequence of pseudo-random 9-bit +-- values. This is used for selecting the probe bit within the 512 bit block. +-- +-- This simple generator works by multiplying a 32bit value by the golden ratio +-- (as a fraction of a 32bit value). This is only suitable for short sequences +-- using the top few bits each time. +genBitIndex :: BitIxGen -> (BitIx, BitIxGen) +genBitIndex (BitIxGen h) = + (BitIx i, BitIxGen h') + where + i :: Int + i = fromIntegral (h `shiftR` (32-9)) -- top 9 bits + + h' :: Word + h' = (h * 0x9e37_79b9) .&. 0xffff_ffff -- keep least significant 32 bits diff --git a/lsm-tree.cabal b/lsm-tree.cabal index dcafd157d..051c71858 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -418,10 +418,14 @@ library bloomfilter exposed-modules: Data.BloomFilter + Data.BloomFilter.Blocked Data.BloomFilter.Classic Data.BloomFilter.Hash other-modules: + Data.BloomFilter.Blocked.BitArray + Data.BloomFilter.Blocked.Calc + Data.BloomFilter.Blocked.Internal Data.BloomFilter.Classic.BitArray Data.BloomFilter.Classic.Calc Data.BloomFilter.Classic.Internal From df0fb165e256242e797f372dde699f88e811de56 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Fri, 11 Apr 2025 12:10:38 +0100 Subject: [PATCH 27/43] bloomfilter: generalise tests to cover the Blocked implementation --- .hlint.yaml | 2 + bloomfilter/tests/bloomfilter-tests.hs | 195 ++++++++++++++++++++----- 2 files changed, 157 insertions(+), 40 deletions(-) diff --git a/.hlint.yaml b/.hlint.yaml index 1ef3d09a9..939564cf0 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -38,6 +38,8 @@ - ignore: {name: "Use /="} - ignore: {name: "Use unless"} - ignore: {name: "Use notElem"} +- ignore: {name: "Use elem"} +- ignore: {name: "Use infix"} # Specify additional command line arguments # diff --git a/bloomfilter/tests/bloomfilter-tests.hs b/bloomfilter/tests/bloomfilter-tests.hs index 408454bdd..52227d28e 100644 --- a/bloomfilter/tests/bloomfilter-tests.hs +++ b/bloomfilter/tests/bloomfilter-tests.hs @@ -1,87 +1,163 @@ module Main (main) where +import qualified Data.BloomFilter.Blocked as Bloom.Blocked import qualified Data.BloomFilter.Classic as B +import qualified Data.BloomFilter.Classic as Bloom.Classic import Data.BloomFilter.Hash (Hashable (..), hash64) import Data.ByteString (ByteString) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as LBS import Data.Int (Int64) +import Data.Proxy (Proxy (..)) import Data.Word (Word32, Word64) import Test.QuickCheck.Instances () import Test.Tasty import Test.Tasty.QuickCheck +import Prelude hiding (elem, notElem) + main :: IO () main = defaultMain tests tests :: TestTree -tests = testGroup "bloomfilter" - [ testGroup "calculations" - [ testProperty "prop_calc_policy_fpr" prop_calc_policy_fpr - , testProperty "prop_calc_size_hashes_bits" prop_calc_size_hashes_bits - , testProperty "prop_calc_size_fpr_fpr" prop_calc_size_fpr_fpr - , testProperty "prop_calc_size_fpr_bits" prop_calc_size_fpr_bits +tests = + testGroup "Data.BloomFilter" $ + [ testGroup "Classic" + [ testGroup "calculations" $ + test_calculations proxyClassic + (FPR 1e-6, FPR 1) (BitsPerEntry 1, BitsPerEntry 50) 1e-6 + ++ test_calculations_classic + , test_fromList proxyClassic + ] + , testGroup "Blocked" + [ testGroup "calculations" $ + -- for the Blocked impl, the calculations are approximations + -- based on regressions, so we have to use much looser tolerances: + test_calculations proxyBlocked + (FPR 1e-4, FPR 1e-1) (BitsPerEntry 3, BitsPerEntry 24) 1e-2 + , test_fromList proxyBlocked ] - , testGroup "fromList" - [ testProperty "()" $ prop_pai () - , testProperty "Char" $ prop_pai (undefined :: Char) - , testProperty "Word32" $ prop_pai (undefined :: Word32) - , testProperty "Word64" $ prop_pai (undefined :: Word64) - , testProperty "ByteString" $ prop_pai (undefined :: ByteString) - , testProperty "LBS.ByteString" $ prop_pai (undefined :: LBS.ByteString) - , testProperty "LBS.ByteString" $ prop_pai (undefined :: String) + , tests_hashes + ] + where + test_calculations proxy fprRrange bitsRange tolerance = + [ testProperty "prop_calc_policy_fpr" $ + prop_calc_policy_fpr proxy fprRrange tolerance + + , testProperty "prop_calc_policy_bits" $ + prop_calc_policy_bits proxy bitsRange tolerance + + , testProperty "prop_calc_size_hashes_bits" $ + prop_calc_size_hashes_bits proxy + ] + + test_calculations_classic = + [ testProperty "prop_calc_size_fpr_fpr" $ + prop_calc_size_fpr_fpr proxyClassic + + , testProperty "prop_calc_size_fpr_bits" $ + prop_calc_size_fpr_bits proxyClassic + ] + + test_fromList proxy = + testGroup "fromList" + [ testProperty "()" $ prop_elem proxy (Proxy :: Proxy ()) + , testProperty "Char" $ prop_elem proxy (Proxy :: Proxy Char) + , testProperty "Word32" $ prop_elem proxy (Proxy :: Proxy Word32) + , testProperty "Word64" $ prop_elem proxy (Proxy :: Proxy Word64) + , testProperty "ByteString" $ prop_elem proxy (Proxy :: Proxy ByteString) + , testProperty "LBS.ByteString" $ prop_elem proxy (Proxy :: Proxy LBS.ByteString) + , testProperty "String" $ prop_elem proxy (Proxy :: Proxy String) ] - , testGroup "hashes" + + tests_hashes = + testGroup "hashes" [ testProperty "prop_rechunked_eq" prop_rechunked_eq , testProperty "prop_tuple_ex" $ hash64 (BS.empty, BS.pack [120]) =/= hash64 (BS.pack [120], BS.empty) , testProperty "prop_list_ex" $ hash64 [[],[],[BS.empty]] =/= hash64 [[],[BS.empty],[]] ] - ] + +proxyClassic :: Proxy Bloom.Classic.Bloom +proxyClassic = Proxy + +proxyBlocked :: Proxy Bloom.Blocked.Bloom +proxyBlocked = Proxy ------------------------------------------------------------------------------- -- Element is in a Bloom filter ------------------------------------------------------------------------------- -prop_pai :: (Hashable a) => a -> a -> [a] -> FPR -> Property -prop_pai _ x xs (FPR q) = let bf = B.fromList (B.policyForFPR q) (x:xs) in - B.elem x bf .&&. not (B.notElem x bf) +prop_elem :: forall bloom a. (BloomFilter bloom, Hashable a) + => Proxy bloom -> Proxy a + -> a -> [a] -> FPR -> Property +prop_elem proxy _ x xs (FPR q) = + let bf :: bloom a + bf = fromList (policyForFPR proxy q) (x:xs) + in elem x bf .&&. not (notElem x bf) ------------------------------------------------------------------------------- -- Bloom filter size calculations ------------------------------------------------------------------------------- -prop_calc_policy_fpr :: FPR -> Property -prop_calc_policy_fpr (FPR fpr) = - let policy = B.policyForFPR fpr - in B.policyFPR policy ~~~ fpr +prop_calc_policy_fpr :: BloomFilter bloom => Proxy bloom + -> (FPR, FPR) -> Double + -> FPR -> Property +prop_calc_policy_fpr proxy (FPR lb, FPR ub) t (FPR fpr) = + fpr > lb && fpr < ub ==> + let policy = policyForFPR proxy fpr + in policyFPR proxy policy ~~~ fpr + where + (~~~) = withinTolerance t + +prop_calc_policy_bits :: BloomFilter bloom => Proxy bloom + -> (BitsPerEntry, BitsPerEntry) -> Double + -> BitsPerEntry -> Property +prop_calc_policy_bits proxy (BitsPerEntry lb, BitsPerEntry ub) t + (BitsPerEntry c) = + c >= lb && c <= ub ==> + let policy = policyForBits proxy c + c' = B.policyBits policy + fpr = policyFPR proxy policy + policy' = policyForFPR proxy fpr + fpr' = policyFPR proxy policy' + in c === c' .&&. fpr ~~~ fpr' + where + (~~~) = withinTolerance t -prop_calc_size_hashes_bits :: BitsPerEntry -> NumEntries -> Property -prop_calc_size_hashes_bits (BitsPerEntry c) (NumEntries numEntries) = - let bsize = B.sizeForBits c numEntries +prop_calc_size_hashes_bits :: BloomFilter bloom => Proxy bloom + -> BitsPerEntry -> NumEntries -> Property +prop_calc_size_hashes_bits proxy (BitsPerEntry c) (NumEntries numEntries) = + let bsize = sizeForBits proxy c numEntries in numHashFunctions (fromIntegral (B.sizeBits bsize)) (fromIntegral numEntries) === fromIntegral (B.sizeHashes bsize) -prop_calc_size_fpr_fpr :: FPR -> NumEntries -> Property -prop_calc_size_fpr_fpr (FPR fpr) (NumEntries numEntries) = - let bsize = B.sizeForFPR fpr numEntries +prop_calc_size_fpr_fpr :: BloomFilter bloom => Proxy bloom + -> FPR -> NumEntries -> Property +prop_calc_size_fpr_fpr proxy (FPR fpr) (NumEntries numEntries) = + let bsize = sizeForFPR proxy fpr numEntries in falsePositiveRate (fromIntegral (B.sizeBits bsize)) (fromIntegral numEntries) (fromIntegral (B.sizeHashes bsize)) ~~~ fpr + where + (~~~) = withinTolerance 1e-6 -prop_calc_size_fpr_bits :: BitsPerEntry -> NumEntries -> Property -prop_calc_size_fpr_bits (BitsPerEntry c) (NumEntries numEntries) = - let policy = B.policyForBits c - bsize = B.sizeForPolicy policy numEntries +prop_calc_size_fpr_bits :: BloomFilter bloom => Proxy bloom + -> BitsPerEntry -> NumEntries -> Property +prop_calc_size_fpr_bits proxy (BitsPerEntry c) (NumEntries numEntries) = + let policy = policyForBits proxy c + bsize = sizeForPolicy proxy policy numEntries in falsePositiveRate (fromIntegral (B.sizeBits bsize)) (fromIntegral numEntries) (fromIntegral (B.sizeHashes bsize)) - ~~~ B.policyFPR policy + ~~~ policyFPR proxy policy + where + (~~~) = withinTolerance 1e-6 -- reference implementations used for sanity checks @@ -111,12 +187,11 @@ falsePositiveRate :: falsePositiveRate m n k = (1 - exp (-(k * n / m))) ** k -(~~~) :: Double -> Double -> Property -a ~~~ b = - counterexample (show a ++ " /= " ++ show b) $ - abs (a - b) < epsilon - where - epsilon = 1e-6 :: Double +withinTolerance :: Double -> Double -> Double -> Property +withinTolerance t a b = + counterexample (show a ++ " /= " ++ show b ++ + " and not within (abs) tolerance of " ++ show t) $ + abs (a - b) < t ------------------------------------------------------------------------------- -- Chunking @@ -144,6 +219,46 @@ prop_rechunked f s = prop_rechunked_eq :: LBS.ByteString -> Property prop_rechunked_eq = prop_rechunked hash64 +------------------------------------------------------------------------------- +-- Class to allow testing two filter implementations +------------------------------------------------------------------------------- + +class BloomFilter bloom where + fromList :: Hashable a => B.BloomPolicy -> [a] -> bloom a + elem :: Hashable a => a -> bloom a -> Bool + notElem :: Hashable a => a -> bloom a -> Bool + + sizeForFPR :: Proxy bloom -> B.FPR -> B.NumEntries -> B.BloomSize + sizeForBits :: Proxy bloom -> B.BitsPerEntry -> B.NumEntries -> B.BloomSize + sizeForPolicy :: Proxy bloom -> B.BloomPolicy -> B.NumEntries -> B.BloomSize + policyForFPR :: Proxy bloom -> B.FPR -> B.BloomPolicy + policyForBits :: Proxy bloom -> B.BitsPerEntry -> B.BloomPolicy + policyFPR :: Proxy bloom -> B.BloomPolicy -> B.FPR + +instance BloomFilter Bloom.Classic.Bloom where + fromList = Bloom.Classic.fromList + elem = Bloom.Classic.elem + notElem = Bloom.Classic.notElem + + sizeForFPR _ = Bloom.Classic.sizeForFPR + sizeForBits _ = Bloom.Classic.sizeForBits + sizeForPolicy _ = Bloom.Classic.sizeForPolicy + policyForFPR _ = Bloom.Classic.policyForFPR + policyForBits _ = Bloom.Classic.policyForBits + policyFPR _ = Bloom.Classic.policyFPR + +instance BloomFilter Bloom.Blocked.Bloom where + fromList = Bloom.Blocked.fromList + elem = Bloom.Blocked.elem + notElem = Bloom.Blocked.notElem + + sizeForFPR _ = Bloom.Blocked.sizeForFPR + sizeForBits _ = Bloom.Blocked.sizeForBits + sizeForPolicy _ = Bloom.Blocked.sizeForPolicy + policyForFPR _ = Bloom.Blocked.policyForFPR + policyForBits _ = Bloom.Blocked.policyForBits + policyFPR _ = Bloom.Blocked.policyFPR + ------------------------------------------------------------------------------- -- QC generators ------------------------------------------------------------------------------- From c83b35970353aa12982e4e5e647d5801353cce16 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 22 Apr 2025 18:08:15 +0100 Subject: [PATCH 28/43] bloomfilter: extend benchmark to blocked implementation --- bloomfilter/bench/bloomfilter-bench.hs | 65 +++++++++++++++++--------- 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/bloomfilter/bench/bloomfilter-bench.hs b/bloomfilter/bench/bloomfilter-bench.hs index a1572c24c..5a7265ed9 100644 --- a/bloomfilter/bench/bloomfilter-bench.hs +++ b/bloomfilter/bench/bloomfilter-bench.hs @@ -1,6 +1,7 @@ module Main where -import qualified Data.BloomFilter as B +import qualified Data.BloomFilter.Blocked as B.Blocked +import qualified Data.BloomFilter.Classic as B.Classic import Data.BloomFilter.Hash (Hashable (..), hash64) import Data.Word (Word64) @@ -11,27 +12,47 @@ import Criterion.Main main :: IO () main = defaultMain [ - env newStdGen $ \g0 -> - bench "construct bloom m=1e6 fpr=1%" $ - whnf (constructBloom 1_000_000 0.01) g0 - - , env newStdGen $ \g0 -> - bench "construct bloom m=1e6 fpr=0.1%" $ - whnf (constructBloom 1_000_000 0.001) g0 - - , env newStdGen $ \g0 -> - bench "construct bloom m=1e7 fpr=0.1%" $ - whnf (constructBloom 10_000_000 0.001) g0 + bgroup "Data.BloomFilter.Classic" [ + env newStdGen $ \g0 -> + bench "construct m=1e6 fpr=1%" $ + whnf (constructBloom_classic 1_000_000 0.01) g0 + + , env newStdGen $ \g0 -> + bench "construct m=1e6 fpr=0.1%" $ + whnf (constructBloom_classic 1_000_000 0.001) g0 + + , env newStdGen $ \g0 -> + bench "construct m=1e7 fpr=0.1%" $ + whnf (constructBloom_classic 10_000_000 0.001) g0 + ] + , bgroup "Data.BloomFilter.Blocked" [ + env newStdGen $ \g0 -> + bench "construct m=1e6 fpr=1%" $ + whnf (constructBloom_blocked 1_000_000 0.01) g0 + + , env newStdGen $ \g0 -> + bench "construct m=1e6 fpr=0.1%" $ + whnf (constructBloom_blocked 1_000_000 0.001) g0 + + , env newStdGen $ \g0 -> + bench "construct m=1e7 fpr=0.1%" $ + whnf (constructBloom_blocked 10_000_000 0.001) g0 + ] ] -constructBloom :: Int -> Double -> StdGen -> B.Bloom Word64 -constructBloom n fpr g0 = - B.unfold (B.sizeForFPR fpr n) nextElement (g0, 0) - where - nextElement :: (StdGen, Int) -> Maybe (Word64, (StdGen, Int)) - nextElement (!g, !i) - | i >= n = Nothing - | otherwise = Just (x, (g', i+1)) - where - (!x, !g') = uniform g +constructBloom_classic :: Int -> Double -> StdGen -> B.Classic.Bloom Word64 +constructBloom_classic n fpr g0 = + B.Classic.unfold (B.Classic.sizeForFPR fpr n) (nextElement n) (g0, 0) + +constructBloom_blocked :: Int -> Double -> StdGen -> B.Blocked.Bloom Word64 +constructBloom_blocked n fpr g0 = + B.Blocked.unfold (B.Blocked.sizeForFPR fpr n) (nextElement n) (g0, 0) + +{-# INLINE nextElement #-} +nextElement :: Int -> (StdGen, Int) -> Maybe (Word64, (StdGen, Int)) +nextElement !n (!g, !i) + | i >= n = Nothing + | otherwise = Just (x, (g', i+1)) + where + (!x, !g') = uniform g From f91a7da8a3330aeacc9ca144b06316674fd74601 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 15 Apr 2025 16:09:04 +0100 Subject: [PATCH 29/43] bloomfilter: add bloomfilter-fpr-calc and gnuplot script The program produces output for gnuplot, and the gnuplot script plots the calculated and actual FPR vs bits-per-key for both the classic and the blocked implementations. We use 2..20 bits for classic, as that gets us down to below an FPR of 1e-4, while for the blocked one we need to use 2..24 bits to get to approximately the same FPR. This graph output (checked in) is also a great test of the FPR to bits code for both implementations, and of the FPR of each impl in general. --- bloomfilter/fpr.blocked.gnuplot.data | 999 ++++++++++++++++++ bloomfilter/fpr.classic.gnuplot.data | 183 ++++ bloomfilter/fpr.gnuplot | 20 + bloomfilter/fpr.png | Bin 0 -> 89790 bytes .../src/Data/BloomFilter/Blocked/Calc.hs | 2 + bloomfilter/src/Data/BloomFilter/Hash.hs | 7 + bloomfilter/tests/fpr-calc.hs | 184 ++++ cabal.project.release | 3 + lsm-tree.cabal | 13 + 9 files changed, 1411 insertions(+) create mode 100644 bloomfilter/fpr.blocked.gnuplot.data create mode 100644 bloomfilter/fpr.classic.gnuplot.data create mode 100644 bloomfilter/fpr.gnuplot create mode 100644 bloomfilter/fpr.png create mode 100644 bloomfilter/tests/fpr-calc.hs diff --git a/bloomfilter/fpr.blocked.gnuplot.data b/bloomfilter/fpr.blocked.gnuplot.data new file mode 100644 index 000000000..07dae151f --- /dev/null +++ b/bloomfilter/fpr.blocked.gnuplot.data @@ -0,0 +1,999 @@ +2.0 0.39201843320666596 0.3880831046648373 +2.0 0.39201843320666596 0.3825950607604861 +2.0 0.39201843320666596 0.3782830262642101 +2.0 0.39201843320666596 0.3986671893375147 +2.0 0.39201843320666596 0.39984319874559 +2.0 0.39201843320666596 0.40493923951391614 +2.0 0.39201843320666596 0.4057232457859663 +2.0 0.39201843320666596 0.39317914543316346 +2.0 0.39201843320666596 0.3884751078008624 +2.2 0.3545234090532114 0.31903580290677064 +2.2 0.3545234090532114 0.3310882665721375 +2.2 0.3545234090532114 0.33286068769939736 +2.2 0.3545234090532114 0.3317972350230415 +2.2 0.3545234090532114 0.33073378234668555 +2.2 0.3545234090532114 0.3555476781283233 +2.2 0.3545234090532114 0.3456221198156682 +2.2 0.3545234090532114 0.34207727756114853 +2.2 0.3545234090532114 0.3342786246012052 +2.4000000000000004 0.32074266536538576 0.31173829377806284 +2.4000000000000004 0.32074266536538576 0.31462475946119306 +2.4000000000000004 0.32074266536538576 0.32200128287363694 +2.4000000000000004 0.32074266536538576 0.3008338678640154 +2.4000000000000004 0.32074266536538576 0.3114175753688262 +2.4000000000000004 0.32074266536538576 0.3296985246953175 +2.4000000000000004 0.32074266536538576 0.3024374599101988 +2.4000000000000004 0.32074266536538576 0.3236048749198204 +2.4000000000000004 0.32074266536538576 0.3290570878768441 +2.6000000000000005 0.29029659365451377 0.2783744557329463 +2.6000000000000005 0.29029659365451377 0.2780841799709724 +2.6000000000000005 0.29029659365451377 0.2708272859216255 +2.6000000000000005 0.29029659365451377 0.2763425253991292 +2.6000000000000005 0.29029659365451377 0.27169811320754716 +2.6000000000000005 0.29029659365451377 0.2879535558780842 +2.6000000000000005 0.29029659365451377 0.2841799709724238 +2.6000000000000005 0.29029659365451377 0.28592162554426703 +2.6000000000000005 0.29029659365451377 0.2896952104499274 +2.8000000000000007 0.262845493076701 0.2578186596583443 +2.8000000000000007 0.262845493076701 0.24362680683311433 +2.8000000000000007 0.262845493076701 0.25098554533508544 +2.8000000000000007 0.262845493076701 0.26701708278580816 +2.8000000000000007 0.262845493076701 0.25624178712220763 +2.8000000000000007 0.262845493076701 0.2507227332457293 +2.8000000000000007 0.262845493076701 0.2614980289093298 +2.8000000000000007 0.262845493076701 0.2604467805519054 +2.8000000000000007 0.262845493076701 0.2680683311432326 +3.000000000000001 0.23808526530534094 0.22833333333333333 +3.000000000000001 0.23808526530534094 0.2311904761904762 +3.000000000000001 0.23808526530534094 0.235 +3.000000000000001 0.23808526530534094 0.23333333333333334 +3.000000000000001 0.23808526530534094 0.22333333333333333 +3.000000000000001 0.23808526530534094 0.23166666666666666 +3.000000000000001 0.23808526530534094 0.22976190476190475 +3.000000000000001 0.23808526530534094 0.2361904761904762 +3.000000000000001 0.23808526530534094 0.24928571428571428 +3.200000000000001 0.21574358502064847 0.21963322545846817 +3.200000000000001 0.21574358502064847 0.2127292340884574 +3.200000000000001 0.21574358502064847 0.2168284789644013 +3.200000000000001 0.21574358502064847 0.22071197411003235 +3.200000000000001 0.21574358502064847 0.2161812297734628 +3.200000000000001 0.21574358502064847 0.21877022653721684 +3.200000000000001 0.21574358502064847 0.21725997842502698 +3.200000000000001 0.21574358502064847 0.21121898597626754 +3.200000000000001 0.21574358502064847 0.23106796116504855 +3.4000000000000012 0.19557649229605542 0.20457656952865244 +3.4000000000000012 0.19557649229605542 0.1875611187169959 +3.4000000000000012 0.19557649229605542 0.21181302562096616 +3.4000000000000012 0.19557649229605542 0.19812243301388618 +3.4000000000000012 0.19557649229605542 0.19264619597105417 +3.4000000000000012 0.19557649229605542 0.19968707216898102 +3.4000000000000012 0.19557649229605542 0.19362409544298845 +3.4000000000000012 0.19557649229605542 0.1985135928026599 +3.4000000000000012 0.19557649229605542 0.19597105417563074 +3.6000000000000014 0.1773653593556774 0.18109258602341255 +3.6000000000000014 0.1773653593556774 0.18268889677190492 +3.6000000000000014 0.1773653593556774 0.18570415040794608 +3.6000000000000014 0.1773653593556774 0.1777225966654842 +3.6000000000000014 0.1773653593556774 0.17630365377793544 +3.6000000000000014 0.1773653593556774 0.1727562965590635 +3.6000000000000014 0.1773653593556774 0.1784320681092586 +3.6000000000000014 0.1773653593556774 0.17949627527492018 +3.6000000000000014 0.1773653593556774 0.17878680383114579 +3.8000000000000016 0.1609141896431677 0.15561635017701964 +3.8000000000000016 0.1609141896431677 0.15561635017701964 +3.8000000000000016 0.1609141896431677 0.16124879304795622 +3.8000000000000016 0.1609141896431677 0.15770840038622466 +3.8000000000000016 0.1609141896431677 0.16124879304795622 +3.8000000000000016 0.1609141896431677 0.16253620856131315 +3.8000000000000016 0.1609141896431677 0.16076601223044737 +3.8000000000000016 0.1609141896431677 0.153363373028645 +3.8000000000000016 0.1609141896431677 0.16929514000643708 +4.000000000000002 0.1460472119673121 0.14575726595589308 +4.000000000000002 0.1460472119673121 0.1421060318387615 +4.000000000000002 0.1460472119673121 0.1444428216737257 +4.000000000000002 0.1460472119673121 0.14838615452022783 +4.000000000000002 0.1460472119673121 0.14590331532057835 +4.000000000000002 0.1460472119673121 0.15130714181393312 +4.000000000000002 0.1460472119673121 0.14707171023806045 +4.000000000000002 0.1460472119673121 0.13801664962757412 +4.000000000000002 0.1460472119673121 0.15159924054330365 +4.200000000000002 0.13260673675027865 0.13168014852141627 +4.200000000000002 0.13260673675027865 0.13128232329929718 +4.200000000000002 0.13260673675027865 0.13406709985413076 +4.200000000000002 0.13260673675027865 0.13300623259514655 +4.200000000000002 0.13260673675027865 0.13592361755735313 +4.200000000000002 0.13260673675027865 0.13300623259514655 +4.200000000000002 0.13260673675027865 0.13446492507624982 +4.200000000000002 0.13260673675027865 0.1299562392255669 +4.200000000000002 0.13260673675027865 0.13353666622463864 +4.400000000000002 0.12045124516777768 0.11780293905083113 +4.400000000000002 0.12045124516777768 0.115152975186702 +4.400000000000002 0.12045124516777768 0.12238014936159962 +4.400000000000002 0.12045124516777768 0.12370513129366417 +4.400000000000002 0.12045124516777768 0.12430739580823898 +4.400000000000002 0.12045124516777768 0.12141652613827993 +4.400000000000002 0.12045124516777768 0.1187665622741508 +4.400000000000002 0.12045124516777768 0.11443025776921224 +4.400000000000002 0.12045124516777768 0.1257528306432185 +4.600000000000002 0.10945368529563608 0.10245183887915937 +4.600000000000002 0.10945368529563608 0.10956654991243432 +4.600000000000002 0.10945368529563608 0.11055166374781086 +4.600000000000002 0.10945368529563608 0.11241243432574431 +4.600000000000002 0.10945368529563608 0.11427320490367776 +4.600000000000002 0.10945368529563608 0.10825306479859895 +4.600000000000002 0.10945368529563608 0.1108800350262697 +4.600000000000002 0.10945368529563608 0.10573555166374782 +4.600000000000002 0.10945368529563608 0.12095008756567426 +4.8000000000000025 9.949995231639147e-2 0.10019900497512438 +4.8000000000000025 9.949995231639147e-2 9.412935323383084e-2 +4.8000000000000025 9.949995231639147e-2 0.10228855721393035 +4.8000000000000025 9.949995231639147e-2 0.10378109452736319 +4.8000000000000025 9.949995231639147e-2 0.1054726368159204 +4.8000000000000025 9.949995231639147e-2 0.10228855721393035 +4.8000000000000025 9.949995231639147e-2 0.10577114427860697 +4.8000000000000025 9.949995231639147e-2 9.681592039800994e-2 +4.8000000000000025 9.949995231639147e-2 0.103681592039801 +5.000000000000003 9.048753243817137e-2 8.994661116641028e-2 +5.000000000000003 9.048753243817137e-2 8.91322052302959e-2 +5.000000000000003 9.048753243817137e-2 8.958465297258167e-2 +5.000000000000003 9.048753243817137e-2 9.655234820378246e-2 +5.000000000000003 9.048753243817137e-2 9.419961994389647e-2 +5.000000000000003 9.048753243817137e-2 9.302325581395349e-2 +5.000000000000003 9.048753243817137e-2 9.727626459143969e-2 +5.000000000000003 9.048753243817137e-2 9.003710071486744e-2 +5.000000000000003 9.048753243817137e-2 9.727626459143969e-2 +5.200000000000003 8.232429247650848e-2 8.355972668148515e-2 +5.200000000000003 8.232429247650848e-2 8.10899810652836e-2 +5.200000000000003 8.232429247650848e-2 8.150160533465053e-2 +5.200000000000003 8.232429247650848e-2 8.792294393677451e-2 +5.200000000000003 8.232429247650848e-2 8.224252901951098e-2 +5.200000000000003 8.232429247650848e-2 8.150160533465053e-2 +5.200000000000003 8.232429247650848e-2 8.602947229768668e-2 +5.200000000000003 8.232429247650848e-2 8.767596937515436e-2 +5.200000000000003 8.232429247650848e-2 8.183090475014407e-2 +5.400000000000003 7.492739908322692e-2 7.934961786303012e-2 +5.400000000000003 7.492739908322692e-2 7.5827963434737e-2 +5.400000000000003 7.492739908322692e-2 7.62026075228533e-2 +5.400000000000003 7.492739908322692e-2 7.695189569908586e-2 +5.400000000000003 7.492739908322692e-2 7.747639742244868e-2 +5.400000000000003 7.492739908322692e-2 7.44043158998951e-2 +5.400000000000003 7.492739908322692e-2 7.717668215195564e-2 +5.400000000000003 7.492739908322692e-2 7.590289225236026e-2 +5.400000000000003 7.492739908322692e-2 7.425445826464859e-2 +5.600000000000003 6.82223534062247e-2 6.972301814708691e-2 +5.600000000000003 6.82223534062247e-2 6.719879929048983e-2 +5.600000000000003 6.82223534062247e-2 6.4538136171374e-2 +5.600000000000003 6.82223534062247e-2 6.719879929048983e-2 +5.600000000000003 6.82223534062247e-2 6.972301814708691e-2 +5.600000000000003 6.82223534062247e-2 6.815390912812117e-2 +5.600000000000003 6.82223534062247e-2 6.863146404693683e-2 +5.600000000000003 6.82223534062247e-2 7.0814572247237e-2 +5.600000000000003 6.82223534062247e-2 6.52203574839678e-2 +5.800000000000003 6.21421285572768e-2 6.232910763112105e-2 +5.800000000000003 6.21421285572768e-2 6.307481978622918e-2 +5.800000000000003 6.21421285572768e-2 6.152125279642058e-2 +5.800000000000003 6.21421285572768e-2 6.829480487198608e-2 +5.800000000000003 6.21421285572768e-2 6.232910763112105e-2 +5.800000000000003 6.21421285572768e-2 6.1086254039274174e-2 +5.800000000000003 6.21421285572768e-2 6.487695749440715e-2 +5.800000000000003 6.21421285572768e-2 6.717623663932389e-2 +5.800000000000003 6.21421285572768e-2 6.102411135968183e-2 +6.0000000000000036 5.66263986760163e-2 5.7701019252548134e-2 +6.0000000000000036 5.66263986760163e-2 5.492638731596829e-2 +6.0000000000000036 5.66263986760163e-2 5.65118912797282e-2 +6.0000000000000036 5.66263986760163e-2 5.9173272933182336e-2 +6.0000000000000036 5.66263986760163e-2 5.951302378255945e-2 +6.0000000000000036 5.66263986760163e-2 5.475651189127973e-2 +6.0000000000000036 5.66263986760163e-2 5.8776896942242356e-2 +6.0000000000000036 5.66263986760163e-2 6.098527746319366e-2 +6.0000000000000036 5.66263986760163e-2 5.311438278595697e-2 +6.200000000000004 5.162084962827559e-2 5.229196778856081e-2 +6.200000000000004 5.162084962827559e-2 5.14660334503407e-2 +6.200000000000004 5.162084962827559e-2 5.203386330786702e-2 +6.200000000000004 5.162084962827559e-2 5.1362791658063185e-2 +6.200000000000004 5.162084962827559e-2 5.280817674994838e-2 +6.200000000000004 5.162084962827559e-2 5.151765434647945e-2 +6.200000000000004 5.162084962827559e-2 5.662812306421639e-2 +6.200000000000004 5.162084962827559e-2 5.440842453024985e-2 +6.200000000000004 5.162084962827559e-2 5.244683047697708e-2 +6.400000000000004 4.7076562484715895e-2 4.891253177666886e-2 +6.400000000000004 4.7076562484715895e-2 4.67470106392995e-2 +6.400000000000004 4.7076562484715895e-2 4.792392430091329e-2 +6.400000000000004 4.7076562484715895e-2 4.7829771207984186e-2 +6.400000000000004 4.7076562484715895e-2 4.7971000847377834e-2 +6.400000000000004 4.7076562484715895e-2 4.599378589586668e-2 +6.400000000000004 4.7076562484715895e-2 4.9289144148385276e-2 +6.400000000000004 4.7076562484715895e-2 4.844176631202335e-2 +6.400000000000004 4.7076562484715895e-2 4.7547311929196874e-2 +6.600000000000004 4.294946190765297e-2 4.453893398617017e-2 +6.600000000000004 4.294946190765297e-2 4.488253231971825e-2 +6.600000000000004 4.294946190765297e-2 4.148949877593094e-2 +6.600000000000004 4.294946190765297e-2 4.372288794399347e-2 +6.600000000000004 4.294946190765297e-2 4.432418502770261e-2 +6.600000000000004 4.294946190765297e-2 4.25632435682687e-2 +6.600000000000004 4.294946190765297e-2 4.655757419576515e-2 +6.600000000000004 4.294946190765297e-2 4.518318086157282e-2 +6.600000000000004 4.294946190765297e-2 4.526908044495984e-2 +6.800000000000004 3.919982244476177e-2 3.9709917679341435e-2 +6.800000000000004 3.919982244476177e-2 3.947471579772638e-2 +6.800000000000004 3.919982244476177e-2 3.7671501372010974e-2 +6.800000000000004 3.919982244476177e-2 3.8729909839278716e-2 +6.800000000000004 3.919982244476177e-2 4.108192865542924e-2 +6.800000000000004 3.919982244476177e-2 4.049392395139161e-2 +6.800000000000004 3.919982244476177e-2 3.9670717365738926e-2 +6.800000000000004 3.919982244476177e-2 3.888671109368875e-2 +6.800000000000004 3.919982244476177e-2 3.90827126617013e-2 +7.000000000000004 3.5791826500754534e-2 3.7939797415798705e-2 +7.000000000000004 3.5791826500754534e-2 3.4575324814775044e-2 +7.000000000000004 3.5791826500754534e-2 3.4253194459357886e-2 +7.000000000000004 3.5791826500754534e-2 3.7975589677511724e-2 +7.000000000000004 3.5791826500754534e-2 3.5505923619313505e-2 +7.000000000000004 3.5791826500754534e-2 3.704499087297326e-2 +7.000000000000004 3.5791826500754534e-2 3.6830237302695155e-2 +7.000000000000004 3.5791826500754534e-2 3.6508106947278e-2 +7.000000000000004 3.5791826500754534e-2 3.543433909588747e-2 +7.200000000000005 3.269316844350807e-2 3.4361003040507404e-2 +7.200000000000005 3.269316844350807e-2 3.269362801190048e-2 +7.200000000000005 3.269316844350807e-2 3.0960865727269755e-2 +7.200000000000005 3.269316844350807e-2 3.226861084774577e-2 +7.200000000000005 3.269316844350807e-2 3.0993559355281655e-2 +7.200000000000005 3.269316844350807e-2 3.220322359172197e-2 +7.200000000000005 3.269316844350807e-2 3.4361003040507404e-2 +7.200000000000005 3.269316844350807e-2 3.494948834472161e-2 +7.200000000000005 3.269316844350807e-2 3.190898093961487e-2 +7.400000000000005 2.9874699909469064e-2 2.969557553849371e-2 +7.400000000000005 2.9874699909469064e-2 3.0741194395482927e-2 +7.400000000000005 2.9874699909469064e-2 2.984494966092074e-2 +7.400000000000005 2.9874699909469064e-2 3.0771069219968333e-2 +7.400000000000005 2.9874699909469064e-2 2.885908045290234e-2 +7.400000000000005 2.9874699909469064e-2 2.9635825889522897e-2 +7.400000000000005 2.9874699909469064e-2 3.008394825680399e-2 +7.400000000000005 2.9874699909469064e-2 3.139844053416186e-2 +7.400000000000005 2.9874699909469064e-2 3.065156992202671e-2 +7.600000000000005 2.7310111913322593e-2 2.733777583570024e-2 +7.600000000000005 2.7310111913322593e-2 2.782936421236618e-2 +7.600000000000005 2.7310111913322593e-2 2.701005025125628e-2 +7.600000000000005 2.7310111913322593e-2 2.7911295608477167e-2 +7.600000000000005 2.7310111913322593e-2 2.690080838977496e-2 +7.600000000000005 2.7310111913322593e-2 2.810246886606948e-2 +7.600000000000005 2.7310111913322593e-2 2.788398514310684e-2 +7.600000000000005 2.7310111913322593e-2 2.8621367708105746e-2 +7.600000000000005 2.7310111913322593e-2 2.7911295608477167e-2 +7.800000000000005 2.4975649846666052e-2 2.4576038362596468e-2 +7.800000000000005 2.4975649846666052e-2 2.5100526986188466e-2 +7.800000000000005 2.4975649846666052e-2 2.5874772097205224e-2 +7.800000000000005 2.4975649846666052e-2 2.5100526986188466e-2 +7.800000000000005 2.4975649846666052e-2 2.5175453932415895e-2 +7.800000000000005 2.4975649846666052e-2 2.547516171732561e-2 +7.800000000000005 2.4975649846666052e-2 2.6074577287145035e-2 +7.800000000000005 2.4975649846666052e-2 2.5375259122355703e-2 +7.800000000000005 2.4975649846666052e-2 2.5999650340917606e-2 +8.000000000000005 2.2849857876681937e-2 2.4197970935015082e-2 +8.000000000000005 2.2849857876681937e-2 2.415227127319258e-2 +8.000000000000005 2.2849857876681937e-2 2.3352527191298784e-2 +8.000000000000005 2.2849857876681937e-2 2.4106571611370076e-2 +8.000000000000005 2.2849857876681937e-2 2.4449319075038844e-2 +8.000000000000005 2.2849857876681937e-2 2.3489626176766292e-2 +8.000000000000005 2.2849857876681937e-2 2.472351704597386e-2 +8.000000000000005 2.2849857876681937e-2 2.3946622794991317e-2 +8.000000000000005 2.2849857876681937e-2 2.454071839868385e-2 +8.200000000000006 2.091334963463375e-2 1.9240421616195415e-2 +8.200000000000006 2.091334963463375e-2 2.082984774970721e-2 +8.200000000000006 2.091334963463375e-2 2.1143550276058222e-2 +8.200000000000006 2.091334963463375e-2 2.1310858290112096e-2 +8.200000000000006 2.091334963463375e-2 2.147816630416597e-2 +8.200000000000006 2.091334963463375e-2 2.1603647314706376e-2 +8.200000000000006 2.091334963463375e-2 2.254475489375941e-2 +8.200000000000006 2.091334963463375e-2 2.1101723272544753e-2 +8.200000000000006 2.091334963463375e-2 2.1708214823490043e-2 +8.400000000000006 1.9148602420807403e-2 1.8957164467763247e-2 +8.400000000000006 1.9148602420807403e-2 2.031671868716849e-2 +8.400000000000006 1.9148602420807403e-2 1.8995461769718322e-2 +8.400000000000006 1.9148602420807403e-2 1.9646515902954638e-2 +8.400000000000006 1.9148602420807403e-2 1.7903988663998623e-2 +8.400000000000006 1.9148602420807403e-2 1.9167799628516172e-2 +8.400000000000006 1.9148602420807403e-2 2.085288091453957e-2 +8.400000000000006 1.9148602420807403e-2 1.9799705110774946e-2 +8.400000000000006 1.9148602420807403e-2 1.98762997146851e-2 +8.600000000000005 1.7539772452040647e-2 1.7399540455685546e-2 +8.600000000000005 1.7539772452040647e-2 1.745216003367653e-2 +8.600000000000005 1.7539772452040647e-2 1.766263834564047e-2 +8.600000000000005 1.7539772452040647e-2 1.68733446757757e-2 +8.600000000000005 1.7539772452040647e-2 1.7101362847069968e-2 +8.600000000000005 1.7539772452040647e-2 1.7417080315015872e-2 +8.600000000000005 1.7539772452040647e-2 1.839931243751425e-2 +8.600000000000005 1.7539772452040647e-2 1.7610018767649484e-2 +8.600000000000005 1.7539772452040647e-2 1.8416852296844578e-2 +8.800000000000006 1.6072528944441514e-2 1.5815358899353885e-2 +8.800000000000006 1.6072528944441514e-2 1.604037416824713e-2 +8.800000000000006 1.6072528944441514e-2 1.6956507763026776e-2 +8.800000000000006 1.6072528944441514e-2 1.6265389437140378e-2 +8.800000000000006 1.6072528944441514e-2 1.6538622263653605e-2 +8.800000000000006 1.6072528944441514e-2 1.5574271111253977e-2 +8.800000000000006 1.6072528944441514e-2 1.6586839821273586e-2 +8.800000000000006 1.6072528944441514e-2 1.592786653380051e-2 +8.800000000000006 1.6072528944441514e-2 1.5783213860940563e-2 +9.000000000000007 1.4733905061014388e-2 1.4336019802271957e-2 +9.000000000000007 1.4733905061014388e-2 1.4984308467533998e-2 +9.000000000000007 1.4733905061014388e-2 1.4954840800931178e-2 +9.000000000000007 1.4733905061014388e-2 1.4365487468874777e-2 +9.000000000000007 1.4733905061014388e-2 1.4115012302750807e-2 +9.000000000000007 1.4733905061014388e-2 1.4615962634998748e-2 +9.000000000000007 1.4733905061014388e-2 1.4498091968587467e-2 +9.000000000000007 1.4733905061014388e-2 1.4365487468874777e-2 +9.000000000000007 1.4733905061014388e-2 1.5131646800548099e-2 +9.200000000000006 1.351216396499029e-2 1.3147404975205048e-2 +9.200000000000006 1.351216396499029e-2 1.3390625211128677e-2 +9.200000000000006 1.351216396499029e-2 1.3633845447052305e-2 +9.200000000000006 1.351216396499029e-2 1.295823368059778e-2 +9.200000000000006 1.351216396499029e-2 1.3282527328495953e-2 +9.200000000000006 1.351216396499029e-2 1.3823016741659573e-2 +9.200000000000006 1.351216396499029e-2 1.4268920507519559e-2 +9.200000000000006 1.351216396499029e-2 1.3823016741659573e-2 +9.200000000000006 1.351216396499029e-2 1.4066236977583201e-2 +9.400000000000006 1.23966784076142e-2 1.263217920587105e-2 +9.400000000000006 1.23966784076142e-2 1.3351184499237606e-2 +9.400000000000006 1.23966784076142e-2 1.2458626204023951e-2 +9.400000000000006 1.23966784076142e-2 1.2954491923587092e-2 +9.400000000000006 1.23966784076142e-2 1.211152020032975e-2 +9.400000000000006 1.23966784076142e-2 1.2880112065652621e-2 +9.400000000000006 1.23966784076142e-2 1.2768542278750914e-2 +9.400000000000006 1.23966784076142e-2 1.2260279916198694e-2 +9.400000000000006 1.23966784076142e-2 1.3239614712335899e-2 +9.600000000000007 1.1377822446563804e-2 1.1878484469222893e-2 +9.600000000000007 1.1377822446563804e-2 1.2344976675389691e-2 +9.600000000000007 1.1377822446563804e-2 1.1503015132552055e-2 +9.600000000000007 1.1377822446563804e-2 1.1218568665377175e-2 +9.600000000000007 1.1377822446563804e-2 1.1173057230629196e-2 +9.600000000000007 1.1377822446563804e-2 1.1173057230629196e-2 +9.600000000000007 1.1377822446563804e-2 1.1798839458413927e-2 +9.600000000000007 1.1377822446563804e-2 1.109341221982023e-2 +9.600000000000007 1.1377822446563804e-2 1.2015018773466833e-2 +9.800000000000008 1.0446874040350686e-2 1.1157309709366708e-2 +9.800000000000008 1.0446874040350686e-2 1.0864795971667954e-2 +9.800000000000008 1.0446874040350686e-2 1.0237980819456342e-2 +9.800000000000008 1.0446874040350686e-2 1.0540941476358623e-2 +9.800000000000008 1.0446874040350686e-2 1.0509600718748041e-2 +9.800000000000008 1.0446874040350686e-2 1.0405131526712772e-2 +9.800000000000008 1.0446874040350686e-2 1.1021499759720858e-2 +9.800000000000008 1.0446874040350686e-2 1.0540941476358623e-2 +9.800000000000008 1.0446874040350686e-2 1.064541066839389e-2 +10.000000000000007 9.595927397018434e-3 9.76864246576657e-3 +10.000000000000007 9.595927397018434e-3 9.624703726094174e-3 +10.000000000000007 9.595927397018434e-3 9.567128230225217e-3 +10.000000000000007 9.595927397018434e-3 9.845409793591848e-3 +10.000000000000007 9.595927397018434e-3 9.356018078705702e-3 +10.000000000000007 9.595927397018434e-3 9.077736515339071e-3 +10.000000000000007 9.595927397018434e-3 1.0162075020871117e-2 +10.000000000000007 9.595927397018434e-3 9.240867086967787e-3 +10.000000000000007 9.595927397018434e-3 9.979752617286083e-3 +10.200000000000006 8.817814074001557e-3 8.817797843166648e-3 +10.200000000000006 8.817814074001557e-3 8.914793619441481e-3 +10.200000000000006 8.817814074001557e-3 9.055878384932147e-3 +10.200000000000006 8.817814074001557e-3 9.04706058708898e-3 +10.200000000000006 8.817814074001557e-3 8.429814738067315e-3 +10.200000000000006 8.817814074001557e-3 8.650259684146481e-3 +10.200000000000006 8.817814074001557e-3 8.747255460421315e-3 +10.200000000000006 8.817814074001557e-3 8.78252665179398e-3 +10.200000000000006 8.817814074001557e-3 8.75607325826448e-3 +10.400000000000007 8.10603193174018e-3 7.976330401653629e-3 +10.400000000000007 8.10603193174018e-3 8.543752279820046e-3 +10.400000000000007 8.10603193174018e-3 8.024966562639322e-3 +10.400000000000007 8.10603193174018e-3 8.25193531390589e-3 +10.400000000000007 8.10603193174018e-3 7.943906294329834e-3 +10.400000000000007 8.10603193174018e-3 8.10602683094881e-3 +10.400000000000007 8.10603193174018e-3 8.527540226158148e-3 +10.400000000000007 8.10603193174018e-3 7.830421918696552e-3 +10.400000000000007 8.10603193174018e-3 8.203299152920196e-3 +10.600000000000009 7.45468113797498e-3 7.40249284351145e-3 +10.600000000000009 7.45468113797498e-3 7.290672709923664e-3 +10.600000000000009 7.45468113797498e-3 7.454675572519084e-3 +10.600000000000009 7.45468113797498e-3 7.477039599236642e-3 +10.600000000000009 7.45468113797498e-3 7.231035305343511e-3 +10.600000000000009 7.45468113797498e-3 7.186307251908397e-3 +10.600000000000009 7.45468113797498e-3 7.074487118320611e-3 +10.600000000000009 7.45468113797498e-3 7.573950381679389e-3 +10.600000000000009 7.45468113797498e-3 7.797590648854962e-3 +10.800000000000008 6.858406503829848e-3 6.721259756114289e-3 +10.800000000000008 6.858406503829848e-3 6.872145179210732e-3 +10.800000000000008 6.858406503829848e-3 6.673250757856329e-3 +10.800000000000008 6.858406503829848e-3 6.920154177468691e-3 +10.800000000000008 6.858406503829848e-3 6.542940619727583e-3 +10.800000000000008 6.858406503829848e-3 6.652675472888633e-3 +10.800000000000008 6.858406503829848e-3 6.789844039339945e-3 +10.800000000000008 6.858406503829848e-3 6.933871034113822e-3 +10.800000000000008 6.858406503829848e-3 7.0916148855328315e-3 +11.000000000000007 6.312345507942478e-3 6.381769978538064e-3 +11.000000000000007 6.312345507942478e-3 6.508016664562555e-3 +11.000000000000007 6.312345507942478e-3 6.451205655851534e-3 +11.000000000000007 6.312345507942478e-3 6.274460295417245e-3 +11.000000000000007 6.312345507942478e-3 6.268147961116021e-3 +11.000000000000007 6.312345507942478e-3 6.1040272692841815e-3 +11.000000000000007 6.312345507942478e-3 6.590077010478475e-3 +11.000000000000007 6.312345507942478e-3 6.19871228380255e-3 +11.000000000000007 6.312345507942478e-3 6.306021966923368e-3 +11.200000000000008 5.812081432014394e-3 5.939961058963703e-3 +11.200000000000008 5.812081432014394e-3 6.125948097991921e-3 +11.200000000000008 5.812081432014394e-3 5.602859550725059e-3 +11.200000000000008 5.812081432014394e-3 5.951585248902967e-3 +11.200000000000008 5.812081432014394e-3 5.823719159571067e-3 +11.200000000000008 5.812081432014394e-3 6.067827148295603e-3 +11.200000000000008 5.812081432014394e-3 5.841155444479963e-3 +11.200000000000008 5.812081432014394e-3 5.858591729388858e-3 +11.200000000000008 5.812081432014394e-3 6.364243991746825e-3 +11.40000000000001 5.353601091101685e-3 5.364312864714385e-3 +11.40000000000001 5.353601091101685e-3 5.6105787247711335e-3 +11.40000000000001 5.353601091101685e-3 5.401788104288238e-3 +11.40000000000001 5.353601091101685e-3 5.557042668237058e-3 +11.40000000000001 5.353601091101685e-3 5.535628245623428e-3 +11.40000000000001 5.353601091101685e-3 5.701590020879062e-3 +11.40000000000001 5.353601091101685e-3 5.385727287328015e-3 +11.40000000000001 5.353601091101685e-3 5.342898442100755e-3 +11.40000000000001 5.353601091101685e-3 5.487445794742759e-3 +11.600000000000009 4.933256695536919e-3 4.992452122778802e-3 +11.600000000000009 4.933256695536919e-3 4.913520073406806e-3 +11.600000000000009 4.933256695536919e-3 5.06645091906505e-3 +11.600000000000009 4.933256695536919e-3 5.239114777066293e-3 +11.600000000000009 4.933256695536919e-3 5.06645091906505e-3 +11.600000000000009 4.933256695536919e-3 4.9036535672353065e-3 +11.600000000000009 4.933256695536919e-3 4.790188746263061e-3 +11.600000000000009 4.933256695536919e-3 4.721123203062563e-3 +11.600000000000009 4.933256695536919e-3 4.972719110435803e-3 +11.800000000000008 4.547731429255463e-3 4.561371594888354e-3 +11.800000000000008 4.547731429255463e-3 4.4658692982855065e-3 +11.800000000000008 4.547731429255463e-3 4.693255718768475e-3 +11.800000000000008 4.547731429255463e-3 4.402201100550275e-3 +11.800000000000008 4.547731429255463e-3 4.429487471008232e-3 +11.800000000000008 4.547731429255463e-3 4.5431806812497155e-3 +11.800000000000008 4.547731429255463e-3 4.72963754604575e-3 +11.800000000000008 4.547731429255463e-3 4.379462458501978e-3 +11.800000000000008 4.547731429255463e-3 4.402201100550275e-3 +12.000000000000009 4.1940083721126e-3 4.399521882273995e-3 +12.000000000000009 4.1940083721126e-3 4.202403170675446e-3 +12.000000000000009 4.1940083721126e-3 3.988508398515319e-3 +12.000000000000009 4.1940083721126e-3 3.946568247111372e-3 +12.000000000000009 4.1940083721126e-3 3.9214041562690045e-3 +12.000000000000009 4.1940083721126e-3 4.038836580200054e-3 +12.000000000000009 4.1940083721126e-3 4.181433094973473e-3 +12.000000000000009 4.1940083721126e-3 4.1059408224463695e-3 +12.000000000000009 4.1940083721126e-3 4.1898211252542625e-3 +12.20000000000001 3.8693424320706163e-3 3.675873116598695e-3 +12.20000000000001 3.8693424320706163e-3 3.8074306807717013e-3 +12.20000000000001 3.8693424320706163e-3 3.8538627622445266e-3 +12.20000000000001 3.8693424320706163e-3 3.5791396135303084e-3 +12.20000000000001 3.8693424320706163e-3 3.830646721508114e-3 +12.20000000000001 3.8693424320706163e-3 3.8577321023672622e-3 +12.20000000000001 3.8693424320706163e-3 3.7996920005262304e-3 +12.20000000000001 3.8693424320706163e-3 3.5868782937757797e-3 +12.20000000000001 3.8693424320706163e-3 3.8848174832264108e-3 +12.40000000000001 3.5712349873948854e-3 3.7533703551595452e-3 +12.40000000000001 3.5712349873948854e-3 3.510526221809546e-3 +12.40000000000001 3.5712349873948854e-3 3.692659321822045e-3 +12.40000000000001 3.5712349873948854e-3 3.692659321822045e-3 +12.40000000000001 3.5712349873948854e-3 3.599807153188222e-3 +12.40000000000001 3.5712349873948854e-3 3.4712426120029286e-3 +12.40000000000001 3.5712349873948854e-3 3.6390907629948394e-3 +12.40000000000001 3.5712349873948854e-3 3.731942931628663e-3 +12.40000000000001 3.5712349873948854e-3 3.7212292198632218e-3 +12.600000000000009 3.2974109696596596e-3 3.3633617790205365e-3 +12.600000000000009 3.2974109696596596e-3 3.4457971167416277e-3 +12.600000000000009 3.2974109696596596e-3 3.162219554981073e-3 +12.600000000000009 3.2974109696596596e-3 3.317197989896725e-3 +12.600000000000009 3.2974109696596596e-3 3.3633617790205365e-3 +12.600000000000009 3.2974109696596596e-3 3.4326074627062534e-3 +12.600000000000009 3.2974109696596596e-3 3.3007109223525065e-3 +12.600000000000009 3.2974109696596596e-3 3.1655169684899165e-3 +12.600000000000009 3.2974109696596596e-3 3.2050859305960406e-3 +12.80000000000001 3.0457981458141417e-3 3.2102728731942215e-3 +12.80000000000001 3.0457981458141417e-3 3.0183874927281534e-3 +12.80000000000001 3.0457981458141417e-3 3.0183874927281534e-3 +12.80000000000001 3.0457981458141417e-3 3.204181273814346e-3 +12.80000000000001 3.0457981458141417e-3 3.1371736806357193e-3 +12.80000000000001 3.0457981458141417e-3 3.1219446821860314e-3 +12.80000000000001 3.0457981458141417e-3 3.088440885596718e-3 +12.80000000000001 3.0457981458141417e-3 3.1067156837363435e-3 +12.80000000000001 3.0457981458141417e-3 3.115853082806156e-3 +13.00000000000001 2.8145083821412512e-3 2.780733010227919e-3 +13.00000000000001 2.8145083821412512e-3 2.8651682230890906e-3 +13.00000000000001 2.8145083821412512e-3 2.7863620244186634e-3 +13.00000000000001 2.8145083821412512e-3 2.8820552656613247e-3 +13.00000000000001 2.8145083821412512e-3 2.7441444179880778e-3 +13.00000000000001 2.8145083821412512e-3 2.9580469572363793e-3 +13.00000000000001 2.8145083821412512e-3 2.8229506166585045e-3 +13.00000000000001 2.8145083821412512e-3 3.0115225920484545e-3 +13.00000000000001 2.8145083821412512e-3 2.81732160246776e-3 +13.20000000000001 2.6018206949624846e-3 2.4951476013800066e-3 +13.20000000000001 2.6018206949624846e-3 2.5627949816051164e-3 +13.20000000000001 2.6018206949624846e-3 2.5029530683290577e-3 +13.20000000000001 2.6018206949624846e-3 2.7189043205861387e-3 +13.20000000000001 2.6018206949624846e-3 2.6408496510956273e-3 +13.20000000000001 2.6018206949624846e-3 2.6304423618302257e-3 +13.20000000000001 2.6018206949624846e-3 2.476934845165554e-3 +13.20000000000001 2.6018206949624846e-3 2.5549895146560653e-3 +13.20000000000001 2.6018206949624846e-3 2.5549895146560653e-3 +13.40000000000001 2.4061659126745766e-3 2.281044949578801e-3 +13.40000000000001 2.4061659126745766e-3 2.30029427404782e-3 +13.40000000000001 2.4061659126745766e-3 2.305106605165075e-3 +13.40000000000001 2.4061659126745766e-3 2.4085717241860544e-3 +13.40000000000001 2.4061659126745766e-3 2.350823750778996e-3 +13.40000000000001 2.4061659126745766e-3 2.4085717241860544e-3 +13.40000000000001 2.4061659126745766e-3 2.276232618461546e-3 +13.40000000000001 2.4061659126745766e-3 2.2858572806960555e-3 +13.40000000000001 2.4061659126745766e-3 2.46872586315174e-3 +13.60000000000001 2.2261127913912448e-3 2.045795545107677e-3 +13.60000000000001 2.2261127913912448e-3 2.1214833019451754e-3 +13.60000000000001 2.2261127913912448e-3 2.1125788599642933e-3 +13.60000000000001 2.2261127913912448e-3 2.070282760555103e-3 +13.60000000000001 2.2261127913912448e-3 2.152648848878263e-3 +13.60000000000001 2.2261127913912448e-3 2.1170310809547344e-3 +13.60000000000001 2.2261127913912448e-3 2.1259355229356165e-3 +13.60000000000001 2.2261127913912448e-3 2.1593271803639247e-3 +13.60000000000001 2.2261127913912448e-3 2.1081266389738522e-3 +13.800000000000011 2.0603554423226206e-3 2.0006057446847964e-3 +13.800000000000011 2.0603554423226206e-3 2.0624164268068807e-3 +13.800000000000011 2.0603554423226206e-3 2.0995028360801314e-3 +13.800000000000011 2.0603554423226206e-3 2.136589245353382e-3 +13.800000000000011 2.0603554423226206e-3 2.095382123938659e-3 +13.800000000000011 2.0603554423226206e-3 2.1118649725045484e-3 +13.800000000000011 2.0603554423226206e-3 2.155132449990007e-3 +13.800000000000011 2.0603554423226206e-3 2.0438732221702556e-3 +13.800000000000011 2.0603554423226206e-3 2.136589245353382e-3 +14.00000000000001 1.9077019432494815e-3 1.9172400899672066e-3 +14.00000000000001 1.9077019432494815e-3 1.955394121608345e-3 +14.00000000000001 1.9077019432494815e-3 1.9992712579956543e-3 +14.00000000000001 1.9077019432494815e-3 1.9363171057877758e-3 +14.00000000000001 1.9077019432494815e-3 1.9210554931313204e-3 +14.00000000000001 1.9077019432494815e-3 1.9210554931313204e-3 +14.00000000000001 1.9077019432494815e-3 2.006902064323882e-3 +14.00000000000001 1.9077019432494815e-3 1.812316502954076e-3 +14.00000000000001 1.9077019432494815e-3 1.98973275008537e-3 +14.20000000000001 1.7670640192116816e-3 1.7794348924740683e-3 +14.20000000000001 1.7670640192116816e-3 1.6893145553179834e-3 +14.20000000000001 1.7670640192116816e-3 1.717587602268912e-3 +14.20000000000001 1.7670640192116816e-3 1.761764238129738e-3 +14.20000000000001 1.7670640192116816e-3 1.7458606492198406e-3 +14.20000000000001 1.7670640192116816e-3 1.763531303564171e-3 +14.20000000000001 1.7670640192116816e-3 1.851884575285823e-3 +14.20000000000001 1.7670640192116816e-3 1.6557403120637556e-3 +14.20000000000001 1.7670640192116816e-3 1.7723666307363363e-3 +14.400000000000011 1.6374476889839651e-3 1.5965102741576566e-3 +14.400000000000011 1.6374476889839651e-3 1.5261000774512163e-3 +14.400000000000011 1.6374476889839651e-3 1.6587332386889294e-3 +14.400000000000011 1.6374476889839651e-3 1.640721327903561e-3 +14.400000000000011 1.6374476889839651e-3 1.5866855955474558e-3 +14.400000000000011 1.6374476889839651e-3 1.6898447209545658e-3 +14.400000000000011 1.6374476889839651e-3 1.7144064174800682e-3 +14.400000000000011 1.6374476889839651e-3 1.5211877381461159e-3 +14.400000000000011 1.6374476889839651e-3 1.637446435033494e-3 +14.600000000000012 1.5179447841949282e-3 1.4891049431908741e-3 +14.600000000000012 1.5179447841949282e-3 1.4541921871323725e-3 +14.600000000000012 1.5179447841949282e-3 1.5088382400935054e-3 +14.600000000000012 1.5179447841949282e-3 1.479997267697352e-3 +14.600000000000012 1.5179447841949282e-3 1.537679212489659e-3 +14.600000000000012 1.5179447841949282e-3 1.476961375866178e-3 +14.600000000000012 1.5179447841949282e-3 1.5179459155870276e-3 +14.600000000000012 1.5179447841949282e-3 1.4526742412167854e-3 +14.600000000000012 1.5179447841949282e-3 1.434458890229741e-3 +14.800000000000011 1.4077252571790926e-3 1.3598623808008829e-3 +14.800000000000011 1.4077252571790926e-3 1.3260769800356436e-3 +14.800000000000011 1.4077252571790926e-3 1.3852014313748124e-3 +14.800000000000011 1.4077252571790926e-3 1.3739396311197327e-3 +14.800000000000011 1.4077252571790926e-3 1.4471413327777512e-3 +14.800000000000011 1.4077252571790926e-3 1.3725319060878477e-3 +14.800000000000011 1.4077252571790926e-3 1.3866091564066973e-3 +14.800000000000011 1.4077252571790926e-3 1.3866091564066973e-3 +14.800000000000011 1.4077252571790926e-3 1.3739396311197327e-3 +15.00000000000001 1.30603020194702e-3 1.2041599678194125e-3 +15.00000000000001 1.30603020194702e-3 1.2694614845124392e-3 +15.00000000000001 1.30603020194702e-3 1.2916640001880684e-3 +15.00000000000001 1.30603020194702e-3 1.2263624834950417e-3 +15.00000000000001 1.30603020194702e-3 1.2995001821912316e-3 +15.00000000000001 1.30603020194702e-3 1.22244439249346e-3 +15.00000000000001 1.30603020194702e-3 1.2407288171675075e-3 +15.00000000000001 1.30603020194702e-3 1.180651421809923e-3 +15.00000000000001 1.30603020194702e-3 1.2746856058478814e-3 +15.200000000000012 1.2121655201122257e-3 1.24004509254882e-3 +15.200000000000012 1.2121655201122257e-3 1.2097409602773435e-3 +15.200000000000012 1.2121655201122257e-3 1.2073166296956252e-3 +15.200000000000012 1.2121655201122257e-3 1.2570154066208468e-3 +15.200000000000012 1.2121655201122257e-3 1.2764100512745917e-3 +15.200000000000012 1.2121655201122257e-3 1.260651902493424e-3 +15.200000000000012 1.2121655201122257e-3 1.2521667454574105e-3 +15.200000000000012 1.2121655201122257e-3 1.2545910760391288e-3 +15.200000000000012 1.2121655201122257e-3 1.1770124974241488e-3 +15.400000000000013 1.125496170313487e-3 1.1457551347950528e-3 +15.400000000000013 1.125496170313487e-3 1.1828965094986253e-3 +15.400000000000013 1.125496170313487e-3 1.0849783398255706e-3 +15.400000000000013 1.125496170313487e-3 1.1873984943111795e-3 +15.400000000000013 1.125496170313487e-3 1.1772690284829325e-3 +15.400000000000013 1.125496170313487e-3 1.1896494867174566e-3 +15.400000000000013 1.125496170313487e-3 1.1558846006232998e-3 +15.400000000000013 1.125496170313487e-3 1.1209942183260046e-3 +15.400000000000013 1.125496170313487e-3 1.055715438543968e-3 +15.600000000000012 1.0454409456952054e-3 1.0423048213654715e-3 +15.600000000000012 1.0454409456952054e-3 1.05694099739267e-3 +15.600000000000012 1.0454409456952054e-3 1.0611227619718693e-3 +15.600000000000012 1.0454409456952054e-3 1.0109415870214755e-3 +15.600000000000012 1.0454409456952054e-3 1.0809861437230668e-3 +15.600000000000012 1.0454409456952054e-3 1.038123056786272e-3 +15.600000000000012 1.0454409456952054e-3 1.1039858489086639e-3 +15.600000000000012 1.0454409456952054e-3 1.0621682031166692e-3 +15.600000000000012 1.0454409456952054e-3 1.0893496728814657e-3 +15.800000000000011 9.714677294265883e-4 9.695250493020003e-4 +15.800000000000011 9.714677294265883e-4 9.520386255670944e-4 +15.800000000000011 9.714677294265883e-4 9.85068537066361e-4 +15.800000000000011 9.714677294265883e-4 9.180372460825554e-4 +15.800000000000011 9.714677294265883e-4 9.510671575818219e-4 +15.800000000000011 9.714677294265883e-4 9.83125601095816e-4 +15.800000000000011 9.714677294265883e-4 1.0132411086392647e-3 +15.800000000000011 9.714677294265883e-4 9.510671575818219e-4 +15.800000000000011 9.714677294265883e-4 9.74382389228363e-4 +16.000000000000014 9.03089183114959e-4 8.994771111974063e-4 +16.000000000000014 9.03089183114959e-4 8.976709322592589e-4 +16.000000000000014 9.03089183114959e-4 9.401161373057228e-4 +16.000000000000014 9.03089183114959e-4 8.118774326972573e-4 +16.000000000000014 9.03089183114959e-4 8.678689797798268e-4 +16.000000000000014 9.03089183114959e-4 9.184419900479541e-4 +16.000000000000014 9.03089183114959e-4 8.859307691613008e-4 +16.000000000000014 9.03089183114959e-4 8.516133693365002e-4 +16.000000000000014 9.03089183114959e-4 8.669658903107531e-4 +16.200000000000014 8.398588273554193e-4 8.348198838306979e-4 +16.200000000000014 8.398588273554193e-4 7.8862763673745e-4 +16.200000000000014 8.398588273554193e-4 8.197024211456349e-4 +16.200000000000014 8.398588273554193e-4 8.121436898031034e-4 +16.200000000000014 8.398588273554193e-4 8.222219982598121e-4 +16.200000000000014 8.398588273554193e-4 8.516170645918789e-4 +16.200000000000014 8.398588273554193e-4 8.003856632702767e-4 +16.200000000000014 8.398588273554193e-4 7.987059451941586e-4 +16.200000000000014 8.398588273554193e-4 7.8862763673745e-4 +16.400000000000013 7.813674776082505e-4 7.548007201080162e-4 +16.400000000000013 7.813674776082505e-4 7.376106415962394e-4 +16.400000000000013 7.813674776082505e-4 7.938690803620544e-4 +16.400000000000013 7.813674776082505e-4 7.524566184927739e-4 +16.400000000000013 7.813674776082505e-4 7.172950942641396e-4 +16.400000000000013 7.813674776082505e-4 7.376106415962394e-4 +16.400000000000013 7.813674776082505e-4 7.508938840826124e-4 +16.400000000000013 7.813674776082505e-4 7.837113066960044e-4 +16.400000000000013 7.813674776082505e-4 8.009013852077812e-4 +16.600000000000012 7.272400021518117e-4 7.694198516139636e-4 +16.600000000000012 7.272400021518117e-4 7.679653717432378e-4 +16.600000000000012 7.272400021518117e-4 7.286944152336403e-4 +16.600000000000012 7.272400021518117e-4 7.243309756214629e-4 +16.600000000000012 7.272400021518117e-4 7.672381318078749e-4 +16.600000000000012 7.272400021518117e-4 7.628746921956973e-4 +16.600000000000012 7.272400021518117e-4 7.74510531161504e-4 +16.600000000000012 7.272400021518117e-4 7.272399353629146e-4 +16.600000000000012 7.272400021518117e-4 7.614202123249716e-4 +16.80000000000001 6.771323720620374e-4 7.272402249163065e-4 +16.80000000000001 6.771323720620374e-4 6.98800663048071e-4 +16.80000000000001 6.771323720620374e-4 6.378587447589951e-4 +16.80000000000001 6.771323720620374e-4 6.669754390526647e-4 +16.80000000000001 6.771323720620374e-4 7.062491197278469e-4 +16.80000000000001 6.771323720620374e-4 6.947378684954659e-4 +16.80000000000001 6.771323720620374e-4 7.048948548769786e-4 +16.80000000000001 6.771323720620374e-4 7.143747088330571e-4 +16.80000000000001 6.771323720620374e-4 6.818723524122165e-4 +17.000000000000014 6.307289760561569e-4 6.326211772304312e-4 +17.000000000000014 6.307289760561569e-4 6.452357570356242e-4 +17.000000000000014 6.307289760561569e-4 6.294675322791329e-4 +17.000000000000014 6.307289760561569e-4 6.136993075226416e-4 +17.000000000000014 6.307289760561569e-4 7.013706371687332e-4 +17.000000000000014 6.307289760561569e-4 6.162222234836802e-4 +17.000000000000014 6.307289760561569e-4 6.439742990551049e-4 +17.000000000000014 6.307289760561569e-4 6.263138873278347e-4 +17.000000000000014 6.307289760561569e-4 6.521737759284804e-4 +17.200000000000014 5.877401756398498e-4 5.589409391618354e-4 +17.200000000000014 5.877401756398498e-4 6.088988569628407e-4 +17.200000000000014 5.877401756398498e-4 5.859769887953207e-4 +17.200000000000014 5.877401756398498e-4 5.983195331932161e-4 +17.200000000000014 5.877401756398498e-4 5.871524692141678e-4 +17.200000000000014 5.877401756398498e-4 5.62467380418377e-4 +17.200000000000014 5.877401756398498e-4 5.818628073293555e-4 +17.200000000000014 5.877401756398498e-4 5.542390174864467e-4 +17.200000000000014 5.877401756398498e-4 5.759854052351195e-4 +17.400000000000013 5.479000783427062e-4 5.292715667205435e-4 +17.400000000000013 5.479000783427062e-4 5.54474974659617e-4 +17.400000000000013 5.479000783427062e-4 5.363942689641947e-4 +17.400000000000013 5.479000783427062e-4 5.473522724159658e-4 +17.400000000000013 5.479000783427062e-4 5.637892775936224e-4 +17.400000000000013 5.479000783427062e-4 5.298194668931321e-4 +17.400000000000013 5.479000783427062e-4 5.522833739692628e-4 +17.400000000000013 5.479000783427062e-4 5.4954387310632e-4 +17.400000000000013 5.479000783427062e-4 5.28723666547955e-4 +17.600000000000016 5.109645089423057e-4 4.915478801869926e-4 +17.600000000000016 5.109645089423057e-4 5.068768161595599e-4 +17.600000000000016 5.109645089423057e-4 5.038110289650465e-4 +17.600000000000016 5.109645089423057e-4 5.06365851627141e-4 +17.600000000000016 5.109645089423057e-4 5.135193550810058e-4 +17.600000000000016 5.109645089423057e-4 5.027890999002087e-4 +17.600000000000016 5.109645089423057e-4 5.216947875997084e-4 +17.600000000000016 5.109645089423057e-4 5.053439225623031e-4 +17.600000000000016 5.109645089423057e-4 4.900149865897359e-4 +17.800000000000015 4.767091604856612e-4 4.37619028323676e-4 +17.800000000000015 4.767091604856612e-4 4.7527905363693355e-4 +17.800000000000015 4.767091604856612e-4 4.547805588461731e-4 +17.800000000000015 4.767091604856612e-4 4.357121915989541e-4 +17.800000000000015 4.767091604856612e-4 4.733722169122116e-4 +17.800000000000015 4.767091604856612e-4 4.485833394908269e-4 +17.800000000000015 4.767091604856612e-4 4.605010690203388e-4 +17.800000000000015 4.767091604856612e-4 4.4095599259193934e-4 +17.800000000000015 4.767091604856612e-4 4.6717499755686545e-4 +18.000000000000014 4.449279086389735e-4 4.1244819370382484e-4 +18.000000000000014 4.449279086389735e-4 4.168974730318057e-4 +18.000000000000014 4.449279086389735e-4 4.2668588755336354e-4 +18.000000000000014 4.449279086389735e-4 4.1200326577102675e-4 +18.000000000000014 4.449279086389735e-4 4.1422790543501714e-4 +18.000000000000014 4.449279086389735e-4 4.213467523597865e-4 +18.000000000000014 4.449279086389735e-4 4.373641579405176e-4 +18.000000000000014 4.449279086389735e-4 4.373641579405176e-4 +18.000000000000014 4.449279086389735e-4 4.106684819726325e-4 +18.200000000000014 4.154312744512661e-4 4.2914051007483166e-4 +18.200000000000014 4.154312744512661e-4 4.1750843429351963e-4 +18.200000000000014 4.154312744512661e-4 4.303868039085436e-4 +18.200000000000014 4.154312744512661e-4 4.1833929684932766e-4 +18.200000000000014 4.154312744512661e-4 4.0421463340059165e-4 +18.200000000000014 4.154312744512661e-4 4.1958559068303963e-4 +18.200000000000014 4.154312744512661e-4 3.988140267878397e-4 +18.200000000000014 4.154312744512661e-4 4.2997137263063964e-4 +18.200000000000014 4.154312744512661e-4 4.2124731579465564e-4 +18.400000000000013 3.8804502202118517e-4 3.6941879790657506e-4 +18.400000000000013 3.8804502202118517e-4 3.705829327739277e-4 +18.400000000000013 3.8804502202118517e-4 3.806721016243174e-4 +18.400000000000013 3.8804502202118517e-4 3.861047310052964e-4 +18.400000000000013 3.8804502202118517e-4 3.7834383188961205e-4 +18.400000000000013 3.8804502202118517e-4 3.826123264032385e-4 +18.400000000000013 3.8804502202118517e-4 3.9192540534205967e-4 +18.400000000000013 3.8804502202118517e-4 4.1598419260068117e-4 +18.400000000000013 3.8804502202118517e-4 3.880449557842175e-4 +18.600000000000016 3.626088788241667e-4 3.61521100938722e-4 +18.600000000000016 3.626088788241667e-4 3.5716979380605936e-4 +18.600000000000016 3.626088788241667e-4 3.535437045288405e-4 +18.600000000000016 3.626088788241667e-4 3.54994140239728e-4 +18.600000000000016 3.626088788241667e-4 3.821898098188696e-4 +18.600000000000016 3.626088788241667e-4 3.782011116139288e-4 +18.600000000000016 3.626088788241667e-4 3.658724080713846e-4 +18.600000000000016 3.626088788241667e-4 3.651471902159409e-4 +18.600000000000016 3.626088788241667e-4 3.68048061637716e-4 +18.800000000000015 3.389753676027032e-4 3.345686725081159e-4 +18.800000000000015 3.389753676027032e-4 3.115183485663207e-4 +18.800000000000015 3.389753676027032e-4 3.352466232122864e-4 +18.800000000000015 3.389753676027032e-4 3.227045351851331e-4 +18.800000000000015 3.389753676027032e-4 3.2914506687475235e-4 +18.800000000000015 3.389753676027032e-4 3.1355220067883204e-4 +18.800000000000015 3.389753676027032e-4 3.1558605279134337e-4 +18.800000000000015 3.389753676027032e-4 3.443989577185874e-4 +18.800000000000015 3.389753676027032e-4 3.3490764786020115e-4 +19.000000000000014 3.170087397577456e-4 2.9545216068413026e-4 +19.000000000000014 3.170087397577456e-4 2.9545216068413026e-4 +19.000000000000014 3.170087397577456e-4 3.062304583914912e-4 +19.000000000000014 3.170087397577456e-4 3.068644759036889e-4 +19.000000000000014 3.170087397577456e-4 2.9703720446462453e-4 +19.000000000000014 3.170087397577456e-4 2.9925626575731646e-4 +19.000000000000014 3.170087397577456e-4 3.10985589732974e-4 +19.000000000000014 3.170087397577456e-4 3.052794321231947e-4 +19.000000000000014 3.170087397577456e-4 3.10985589732974e-4 +19.200000000000017 2.9658400111523623e-4 2.7256070036533217e-4 +19.200000000000017 2.9658400111523623e-4 2.781957964555839e-4 +19.200000000000017 2.9658400111523623e-4 2.8116163650308476e-4 +19.200000000000017 2.9658400111523623e-4 2.856103965743361e-4 +19.200000000000017 2.9658400111523623e-4 2.88576236621837e-4 +19.200000000000017 2.9658400111523623e-4 2.948045007215889e-4 +19.200000000000017 2.9658400111523623e-4 2.9213524467883807e-4 +19.200000000000017 2.9658400111523623e-4 2.734504523795824e-4 +19.200000000000017 2.9658400111523623e-4 2.998464288023404e-4 +19.400000000000016 2.7758602178815306e-4 2.556567171512347e-4 +19.400000000000016 2.7758602178815306e-4 2.670377436476523e-4 +19.400000000000016 2.7758602178815306e-4 2.6676015763554454e-4 +19.400000000000016 2.7758602178815306e-4 2.5704464721177343e-4 +19.400000000000016 2.7758602178815306e-4 2.687032597202988e-4 +19.400000000000016 2.7758602178815306e-4 2.5843257727231215e-4 +19.400000000000016 2.7758602178815306e-4 2.6731532965976006e-4 +19.400000000000016 2.7758602178815306e-4 2.6453946953868256e-4 +19.400000000000016 2.7758602178815306e-4 2.692584317445143e-4 +19.600000000000016 2.5990872262005797e-4 2.5471052019425573e-4 +19.600000000000016 2.5990872262005797e-4 2.6042851146392275e-4 +19.600000000000016 2.5990872262005797e-4 2.645870505691351e-4 +19.600000000000016 2.5990872262005797e-4 2.596487853816954e-4 +19.600000000000016 2.5990872262005797e-4 2.661465027335897e-4 +19.600000000000016 2.5990872262005797e-4 2.7550321572031744e-4 +19.600000000000016 2.5990872262005797e-4 2.762829418025448e-4 +19.600000000000016 2.5990872262005797e-4 2.614681462402258e-4 +19.600000000000016 2.5990872262005797e-4 2.5834924191131656e-4 +19.800000000000015 2.4345433138892443e-4 2.337161327673183e-4 +19.800000000000015 2.4345433138892443e-4 2.4028939900139914e-4 +19.800000000000015 2.4345433138892443e-4 2.475930281503778e-4 +19.800000000000015 2.4345433138892443e-4 2.4345430496595656e-4 +19.800000000000015 2.4345433138892443e-4 2.5148829702983315e-4 +19.800000000000015 2.4345433138892443e-4 2.475930281503778e-4 +19.800000000000015 2.4345433138892443e-4 2.495406625901055e-4 +19.800000000000015 2.4345433138892443e-4 2.449150307957523e-4 +19.800000000000015 2.4345433138892443e-4 2.5343593146956076e-4 +20.000000000000014 2.2813270257705028e-4 2.2699202037498626e-4 +20.000000000000014 2.2813270257705028e-4 2.3064214331568956e-4 +20.000000000000014 2.2813270257705028e-4 2.2630762232360438e-4 +20.000000000000014 2.2813270257705028e-4 2.23113764750489e-4 +20.000000000000014 2.2813270257705028e-4 2.33607868205011e-4 +20.000000000000014 2.2813270257705028e-4 2.2653575500739836e-4 +20.000000000000014 2.2813270257705028e-4 2.2357003011807693e-4 +20.000000000000014 2.2813270257705028e-4 2.2357003011807693e-4 +20.000000000000014 2.2813270257705028e-4 2.345203989401868e-4 +20.200000000000017 2.1386069508055506e-4 2.1621316436627408e-4 +20.200000000000017 2.1386069508055506e-4 2.1086664694871043e-4 +20.200000000000017 2.1386069508055506e-4 2.1835177133329954e-4 +20.200000000000017 2.1386069508055506e-4 2.0872803998168497e-4 +20.200000000000017 2.1386069508055506e-4 1.9846272653996277e-4 +20.200000000000017 2.1386069508055506e-4 2.1043892555530535e-4 +20.200000000000017 2.1386069508055506e-4 2.1321911461243844e-4 +20.200000000000017 2.1386069508055506e-4 2.1450227879265372e-4 +20.200000000000017 2.1386069508055506e-4 2.2305670666075556e-4 +20.400000000000016 2.0056160274611903e-4 1.9835543488877554e-4 +20.400000000000016 2.0056160274611903e-4 1.9935824295191394e-4 +20.400000000000016 2.0056160274611903e-4 1.8712398458162546e-4 +20.400000000000016 2.0056160274611903e-4 1.9695150360038178e-4 +20.400000000000016 2.0056160274611903e-4 1.8732454619425315e-4 +20.400000000000016 2.0056160274611903e-4 1.9715206521300947e-4 +20.400000000000016 2.0056160274611903e-4 1.997593661771693e-4 +20.400000000000016 2.0056160274611903e-4 1.9915768133928628e-4 +20.400000000000016 2.0056160274611903e-4 1.9053353199629603e-4 +20.600000000000016 1.8816463308837033e-4 1.6859551095635615e-4 +20.600000000000016 1.8816463308837033e-4 1.9249241931735753e-4 +20.600000000000016 1.8816463308837033e-4 1.7649842553243536e-4 +20.600000000000016 1.8816463308837033e-4 1.7969722428941978e-4 +20.600000000000016 1.8816463308837033e-4 1.751812731030888e-4 +20.600000000000016 1.8816463308837033e-4 1.7066532191675784e-4 +20.600000000000016 1.8816463308837033e-4 1.7706291943072672e-4 +20.600000000000016 1.8816463308837033e-4 1.8571849253786108e-4 +20.600000000000016 1.8816463308837033e-4 1.7988538892218356e-4 +20.80000000000002 1.766044299634105e-4 1.6318249666394226e-4 +20.80000000000002 1.766044299634105e-4 1.7625122475174715e-4 +20.80000000000002 1.766044299634105e-4 1.7024667400870164e-4 +20.80000000000002 1.766044299634105e-4 1.649485410001321e-4 +20.80000000000002 1.766044299634105e-4 1.6530174986737007e-4 +20.80000000000002 1.766044299634105e-4 1.7413197154831934e-4 +20.80000000000002 1.766044299634105e-4 1.746617848491763e-4 +20.80000000000002 1.766044299634105e-4 1.6759760750441688e-4 +20.80000000000002 1.766044299634105e-4 1.6689118976994093e-4 +21.000000000000018 1.6582063635655071e-4 1.7477496479627606e-4 +21.000000000000018 1.6582063635655071e-4 1.7842301908993648e-4 +21.000000000000018 1.6582063635655071e-4 1.7195601375117483e-4 +21.000000000000018 1.6582063635655071e-4 1.6333333996615933e-4 +21.000000000000018 1.6582063635655071e-4 1.8107614948532588e-4 +21.000000000000018 1.6582063635655071e-4 1.6217259541817647e-4 +21.000000000000018 1.6582063635655071e-4 1.8008122558705485e-4 +21.000000000000018 1.6582063635655071e-4 1.6830795945751444e-4 +21.000000000000018 1.6582063635655071e-4 1.754382473951234e-4 +21.200000000000017 1.5575749378922264e-4 1.5653627739910535e-4 +21.200000000000017 1.5575749378922264e-4 1.6494718185637073e-4 +21.200000000000017 1.5575749378922264e-4 1.556017324594092e-4 +21.200000000000017 1.5575749378922264e-4 1.5871688225839637e-4 +21.200000000000017 1.5575749378922264e-4 1.5933991221819382e-4 +21.200000000000017 1.5575749378922264e-4 1.6074172962773805e-4 +21.200000000000017 1.5575749378922264e-4 1.556017324594092e-4 +21.200000000000017 1.5575749378922264e-4 1.5233082517047267e-4 +21.200000000000017 1.5575749378922264e-4 1.6245506201718098e-4 +21.400000000000016 1.463634751646593e-4 1.5587709332690896e-4 +21.400000000000016 1.463634751646593e-4 1.5807254534559782e-4 +21.400000000000016 1.463634751646593e-4 1.4855891993127942e-4 +21.400000000000016 1.463634751646593e-4 1.4182620040730025e-4 +21.400000000000016 1.463634751646593e-4 1.535352778403075e-4 +21.400000000000016 1.463634751646593e-4 1.5294982396865715e-4 +21.400000000000016 1.463634751646593e-4 1.4987619114249274e-4 +21.400000000000016 1.463634751646593e-4 1.4211892734312544e-4 +21.400000000000016 1.463634751646593e-4 1.4387528895807653e-4 +21.600000000000016 1.375909481573892e-4 1.3057380928793933e-4 +21.600000000000016 1.375909481573892e-4 1.4281940362579666e-4 +21.600000000000016 1.375909481573892e-4 1.45158449735275e-4 +21.600000000000016 1.375909481573892e-4 1.3029862739270657e-4 +21.600000000000016 1.375909481573892e-4 1.3855408424968905e-4 +21.600000000000016 1.375909481573892e-4 1.3951722088300367e-4 +21.600000000000016 1.375909481573892e-4 1.3924203898777092e-4 +21.600000000000016 1.375909481573892e-4 1.3951722088300367e-4 +21.600000000000016 1.375909481573892e-4 1.4103072130678378e-4 +21.80000000000002 1.2939586651062514e-4 1.2616097208387647e-4 +21.80000000000002 1.2939586651062514e-4 1.3392472421211503e-4 +21.80000000000002 1.2939586651062514e-4 1.2538459687105261e-4 +21.80000000000002 1.2939586651062514e-4 1.2007936625008961e-4 +21.80000000000002 1.2939586651062514e-4 1.2900768119756395e-4 +21.80000000000002 1.2939586651062514e-4 1.2409063818301285e-4 +21.80000000000002 1.2939586651062514e-4 1.2887828532875996e-4 +21.80000000000002 1.2939586651062514e-4 1.3353653660570309e-4 +21.80000000000002 1.2939586651062514e-4 1.3340714073689912e-4 +22.000000000000018 1.2173748684079103e-4 1.2076358130730716e-4 +22.000000000000018 1.2173748684079103e-4 1.2502439314778673e-4 +22.000000000000018 1.2173748684079103e-4 1.2465918070431707e-4 +22.000000000000018 1.2173748684079103e-4 1.1516365717410541e-4 +22.000000000000018 1.2173748684079103e-4 1.1528539465526198e-4 +22.000000000000018 1.2173748684079103e-4 1.1808535672186284e-4 +22.000000000000018 1.2173748684079103e-4 1.1930273153342844e-4 +22.000000000000018 1.2173748684079103e-4 1.2636350544050891e-4 +22.000000000000018 1.2173748684079103e-4 1.2039836886383749e-4 +22.200000000000017 1.1457810876195154e-4 1.1297401105059987e-4 +22.200000000000017 1.1457810876195154e-4 1.0460980942109298e-4 +22.200000000000017 1.1457810876195154e-4 1.1366147967768262e-4 +22.200000000000017 1.1457810876195154e-4 1.0735968392942401e-4 +22.200000000000017 1.1457810876195154e-4 1.0483896563012057e-4 +22.200000000000017 1.1457810876195154e-4 1.1033871464678263e-4 +22.200000000000017 1.1457810876195154e-4 1.1274485484157229e-4 +22.200000000000017 1.1457810876195154e-4 1.1847376006726193e-4 +22.200000000000017 1.1457810876195154e-4 1.1182823000546194e-4 +22.40000000000002 1.0788283633687184e-4 1.210445432051883e-4 +22.40000000000002 1.0788283633687184e-4 1.1877900362648157e-4 +22.40000000000002 1.0788283633687184e-4 1.1726864390734375e-4 +22.40000000000002 1.0788283633687184e-4 1.1209026772744264e-4 +22.40000000000002 1.0788283633687184e-4 1.1543463567696211e-4 +22.40000000000002 1.0788283633687184e-4 1.0971684531165464e-4 +22.40000000000002 1.0788283633687184e-4 1.1770017525566884e-4 +22.40000000000002 1.0788283633687184e-4 1.1834747227815648e-4 +22.40000000000002 1.0788283633687184e-4 1.170528782331812e-4 +22.60000000000002 1.0161935903764002e-4 1.0395660040576609e-4 +22.60000000000002 1.0161935903764002e-4 1.0497279395811961e-4 +22.60000000000002 1.0161935903764002e-4 1.0365174234006003e-4 +22.60000000000002 1.0161935903764002e-4 1.0355012298482467e-4 +22.60000000000002 1.0161935903764002e-4 1.095456649437105e-4 +22.60000000000002 1.0161935903764002e-4 1.0466793589241355e-4 +22.60000000000002 1.0161935903764002e-4 1.0619222622094385e-4 +22.60000000000002 1.0161935903764002e-4 1.0578574880000244e-4 +22.60000000000002 1.0161935903764002e-4 1.1035861978559332e-4 +22.80000000000002 9.57577505589414e-5 1.0150321473129562e-4 +22.80000000000002 9.57577505589414e-5 9.882199773839347e-5 +22.80000000000002 9.57577505589414e-5 1.0246079222876067e-4 +22.80000000000002 9.57577505589414e-5 9.671532724397035e-5 +22.80000000000002 9.57577505589414e-5 9.642805399473084e-5 +22.80000000000002 9.57577505589414e-5 9.738563149219589e-5 +22.80000000000002 9.57577505589414e-5 9.75771469916889e-5 +22.80000000000002 9.57577505589414e-5 1.0083291048307008e-4 +22.80000000000002 9.57577505589414e-5 9.853472448915395e-5 +23.000000000000018 9.027028397266895e-5 9.41519030374415e-5 +23.000000000000018 9.027028397266895e-5 9.315892994692198e-5 +23.000000000000018 9.027028397266895e-5 9.55962275327426e-5 +23.000000000000018 9.027028397266895e-5 9.505460584700469e-5 +23.000000000000018 9.027028397266895e-5 9.37005516326599e-5 +23.000000000000018 9.027028397266895e-5 9.027028095631975e-5 +23.000000000000018 9.027028397266895e-5 8.990919983249446e-5 +23.000000000000018 9.027028397266895e-5 9.686001146613108e-5 +23.000000000000018 9.027028397266895e-5 9.523514640891733e-5 +23.200000000000017 8.513126184494805e-5 8.989861377359136e-5 +23.200000000000017 8.513126184494805e-5 8.972835124750502e-5 +23.200000000000017 8.513126184494805e-5 8.010851852362639e-5 +23.200000000000017 8.513126184494805e-5 8.30029814670943e-5 +23.200000000000017 8.513126184494805e-5 8.828111977577107e-5 +23.200000000000017 8.513126184494805e-5 8.206653757361939e-5 +23.200000000000017 8.513126184494805e-5 8.274758767796478e-5 +23.200000000000017 8.513126184494805e-5 8.989861377359136e-5 +23.200000000000017 8.513126184494805e-5 8.71744133562098e-5 +23.40000000000002 8.031686005707997e-5 8.047749336863849e-5 +23.40000000000002 8.031686005707997e-5 8.601935668444293e-5 +23.40000000000002 8.031686005707997e-5 7.838925501775565e-5 +23.40000000000002 8.031686005707997e-5 7.951369105284641e-5 +23.40000000000002 8.031686005707997e-5 8.336890031601471e-5 +23.40000000000002 8.031686005707997e-5 7.525689749143139e-5 +23.40000000000002 8.031686005707997e-5 7.774672014056092e-5 +23.40000000000002 8.031686005707997e-5 7.903178989495036e-5 +23.40000000000002 8.031686005707997e-5 7.99152753510931e-5 +23.60000000000002 7.580498418149386e-5 7.239375836934385e-5 +23.60000000000002 7.580498418149386e-5 7.178731850865826e-5 +23.60000000000002 7.580498418149386e-5 7.603239753345748e-5 +23.60000000000002 7.580498418149386e-5 7.785171711551428e-5 +23.60000000000002 7.580498418149386e-5 7.262117331710095e-5 +23.60000000000002 7.580498418149386e-5 7.239375836934385e-5 +23.60000000000002 7.580498418149386e-5 7.262117331710095e-5 +23.60000000000002 7.580498418149386e-5 7.663883739414308e-5 +23.60000000000002 7.580498418149386e-5 7.610820251604317e-5 +23.80000000000002 7.157513736351763e-5 6.921315734247816e-5 +23.80000000000002 7.157513736351763e-5 7.193301254311328e-5 +23.80000000000002 7.157513736351763e-5 6.620700159440775e-5 +23.80000000000002 7.157513736351763e-5 6.856898111074878e-5 +23.80000000000002 7.157513736351763e-5 7.221931309054856e-5 +23.80000000000002 7.157513736351763e-5 7.064466007965453e-5 +23.80000000000002 7.157513736351763e-5 6.949945788991344e-5 +23.80000000000002 7.157513736351763e-5 7.1646711995678e-5 +23.80000000000002 7.157513736351763e-5 7.028678439536044e-5 +24.00000000000002 6.760829875058038e-5 7.024502058131777e-5 +24.00000000000002 6.760829875058038e-5 7.470716818321092e-5 +24.00000000000002 6.760829875058038e-5 7.308456905524977e-5 +24.00000000000002 6.760829875058038e-5 7.240848608526597e-5 +24.00000000000002 6.760829875058038e-5 7.308456905524977e-5 +24.00000000000002 6.760829875058038e-5 7.430151840122062e-5 +24.00000000000002 6.760829875058038e-5 7.031262887831615e-5 +24.00000000000002 6.760829875058038e-5 7.024502058131777e-5 +24.00000000000002 6.760829875058038e-5 7.078588695730482e-5 diff --git a/bloomfilter/fpr.classic.gnuplot.data b/bloomfilter/fpr.classic.gnuplot.data new file mode 100644 index 000000000..583ffd95e --- /dev/null +++ b/bloomfilter/fpr.classic.gnuplot.data @@ -0,0 +1,183 @@ +2.0 0.3934693402873666 0.3900039354584809 +2.0 0.3934693402873666 0.37504919323101144 +2.0 0.3934693402873666 0.3892168437622983 +2.3 0.3374056100322293 0.3434547908232119 +2.3 0.3374056100322293 0.32388663967611336 +2.3 0.3374056100322293 0.340080971659919 +2.5999999999999996 0.28797243387934673 0.28217679239850274 +2.5999999999999996 0.28797243387934673 0.2824647279009502 +2.5999999999999996 0.28797243387934673 0.2830405989058451 +2.8999999999999995 0.2482540030425363 0.2477656405163853 +2.8999999999999995 0.2482540030425363 0.23857994041708044 +2.8999999999999995 0.2482540030425363 0.24056603773584906 +3.1999999999999993 0.21598193982220967 0.21706263498920086 +3.1999999999999993 0.21598193982220967 0.21036717062634988 +3.1999999999999993 0.21598193982220967 0.21036717062634988 +3.499999999999999 0.18947031330845207 0.18075028419856007 +3.499999999999999 0.18947031330845207 0.1911708980674498 +3.499999999999999 0.18947031330845207 0.1964759378552482 +3.799999999999999 0.16269646923733003 0.16254474454930035 +3.799999999999999 0.16269646923733003 0.15929059550927432 +3.799999999999999 0.16269646923733003 0.15554832411324437 +4.099999999999999 0.1397281316954663 0.14559172837781192 +4.099999999999999 0.1397281316954663 0.13846583764146989 +4.099999999999999 0.1397281316954663 0.13706860416375577 +4.399999999999999 0.12077596326307868 0.12065217391304348 +4.399999999999999 0.12077596326307868 0.1213768115942029 +4.399999999999999 0.12077596326307868 0.1178743961352657 +4.699999999999998 0.10502663985837676 0.10272030248923432 +4.699999999999998 0.10502663985837676 0.10629135595000525 +4.699999999999998 0.10502663985837676 9.904421804432308e-2 +4.999999999999998 9.184883923294052e-2 9.396527969137504e-2 +4.999999999999998 9.184883923294052e-2 9.06585836318545e-2 +4.999999999999998 9.184883923294052e-2 9.176081565169468e-2 +5.299999999999998 7.881929681834592e-2 8.291952392212501e-2 +5.299999999999998 7.881929681834592e-2 7.283045637266493e-2 +5.299999999999998 7.881929681834592e-2 7.448569401749823e-2 +5.599999999999998 6.789553531345656e-2 6.91832439405255e-2 +5.599999999999998 6.789553531345656e-2 6.544911399280331e-2 +5.599999999999998 6.789553531345656e-2 6.477018127503564e-2 +5.899999999999998 5.876263252151765e-2 5.858502761781643e-2 +5.899999999999998 5.876263252151765e-2 5.688094958279469e-2 +5.899999999999998 5.876263252151765e-2 5.699847220589964e-2 +6.1999999999999975 5.108781980733247e-2 5.057729641360989e-2 +6.1999999999999975 5.108781980733247e-2 4.955553284969858e-2 +6.1999999999999975 5.108781980733247e-2 4.930009195872075e-2 +6.499999999999997 4.450177077923493e-2 4.570335098571492e-2 +6.499999999999997 4.450177077923493e-2 4.28552356370433e-2 +6.499999999999997 4.450177077923493e-2 4.20987049975524e-2 +6.799999999999997 3.825331720048197e-2 3.756407313901002e-2 +6.799999999999997 3.825331720048197e-2 3.80613572029684e-2 +6.799999999999997 3.825331720048197e-2 3.699028383444266e-2 +7.099999999999997 3.300976153640641e-2 3.231663035584604e-2 +7.099999999999997 3.300976153640641e-2 3.26467287251601e-2 +7.099999999999997 3.300976153640641e-2 3.32409057899254e-2 +7.399999999999997 2.859146946961458e-2 2.8162973552537527e-2 +7.399999999999997 2.859146946961458e-2 2.7962830593280915e-2 +7.399999999999997 2.859146946961458e-2 2.9049320943531094e-2 +7.699999999999997 2.485365957813612e-2 2.5424992543990457e-2 +7.699999999999997 2.485365957813612e-2 2.3884083904960732e-2 +7.699999999999997 2.485365957813612e-2 2.4381151207873545e-2 +7.9999999999999964 2.157714146321929e-2 2.2073578595317726e-2 +7.9999999999999964 2.157714146321929e-2 2.0606322149099146e-2 +7.9999999999999964 2.157714146321929e-2 2.1814650987161507e-2 +8.299999999999997 1.858180941904642e-2 1.789430652594024e-2 +8.299999999999997 1.858180941904642e-2 1.735543332837818e-2 +8.299999999999997 1.858180941904642e-2 1.9325107774639513e-2 +8.599999999999996 1.6053280201294692e-2 1.595684908416676e-2 +8.599999999999996 1.6053280201294692e-2 1.4929446326232483e-2 +8.599999999999996 1.6053280201294692e-2 1.651870996741207e-2 +8.899999999999995 1.3911659303848264e-2 1.3299574302328816e-2 +8.899999999999995 1.3911659303848264e-2 1.35638963857433e-2 +8.899999999999995 1.3911659303848264e-2 1.402298210956846e-2 +9.199999999999996 1.2091803064275024e-2 1.2152210976892662e-2 +9.199999999999996 1.2091803064275024e-2 1.2007109950302899e-2 +9.199999999999996 1.2091803064275024e-2 1.2599605808877765e-2 +9.499999999999996 1.0472810419700971e-2 1.0923181651568309e-2 +9.499999999999996 1.0472810419700971e-2 1.0776561763627795e-2 +9.499999999999996 1.0472810419700971e-2 1.0713724668796146e-2 +9.799999999999995 9.030714850304539e-3 8.922362800610477e-3 +9.799999999999995 9.030714850304539e-3 8.804963290076129e-3 +9.799999999999995 9.030714850304539e-3 8.714655974280476e-3 +10.099999999999994 7.808242114948139e-3 8.222066057624737e-3 +10.099999999999994 7.808242114948139e-3 7.652065276801749e-3 +10.099999999999994 7.808242114948139e-3 7.816038104161786e-3 +10.399999999999995 6.769015491520877e-3 6.653940920044405e-3 +10.399999999999995 6.769015491520877e-3 6.80962824574229e-3 +10.399999999999995 6.769015491520877e-3 6.477946551864186e-3 +10.699999999999996 5.883120099197549e-3 5.81251691395357e-3 +10.699999999999996 5.883120099197549e-3 5.759568885385168e-3 +10.699999999999996 5.883120099197549e-3 5.724270199672899e-3 +10.999999999999995 5.086410643627119e-3 4.877875097913552e-3 +10.999999999999995 5.086410643627119e-3 5.132196010213528e-3 +10.999999999999995 5.086410643627119e-3 5.132196010213528e-3 +11.299999999999994 4.390250871934632e-3 4.706357533903774e-3 +11.299999999999994 4.390250871934632e-3 4.346356304631284e-3 +11.299999999999994 4.390250871934632e-3 4.188306984462873e-3 +11.599999999999994 3.7982630662866026e-3 3.6463358123352504e-3 +11.599999999999994 3.7982630662866026e-3 3.5589756834980514e-3 +11.599999999999994 3.7982630662866026e-3 3.718502875287719e-3 +11.899999999999995 3.293628806737463e-3 3.2738722596964585e-3 +11.899999999999995 3.293628806737463e-3 3.2837531618887016e-3 +11.899999999999995 3.293628806737463e-3 3.204705944350759e-3 +12.199999999999994 2.862426553117111e-3 2.9397115819483964e-3 +12.199999999999994 2.862426553117111e-3 2.765103591199757e-3 +12.199999999999994 2.862426553117111e-3 2.885325486469312e-3 +12.499999999999993 2.4714022749062954e-3 2.429385931309916e-3 +12.499999999999993 2.4714022749062954e-3 2.5405989190097596e-3 +12.499999999999993 2.4714022749062954e-3 2.3601867389633465e-3 +12.799999999999994 2.1347378701230876e-3 2.047211821314058e-3 +12.799999999999994 2.1347378701230876e-3 2.132601261202027e-3 +12.799999999999994 2.1347378701230876e-3 2.224394909081594e-3 +13.099999999999994 1.8477570412903156e-3 1.8606902846837658e-3 +13.099999999999994 1.8477570412903156e-3 1.9438393043568238e-3 +13.099999999999994 1.8477570412903156e-3 1.8311261888000118e-3 +13.399999999999993 1.602601363396337e-3 1.5240750979590855e-3 +13.399999999999993 1.602601363396337e-3 1.5481141373590711e-3 +13.399999999999993 1.602601363396337e-3 1.5577297531190653e-3 +13.699999999999992 1.392732355820269e-3 1.3955179084501257e-3 +13.699999999999992 1.392732355820269e-3 1.3551286675867985e-3 +13.699999999999992 1.392732355820269e-3 1.2980266374007156e-3 +13.999999999999993 1.2011660314775851e-3 1.2323969038730415e-3 +13.999999999999993 1.2011660314775851e-3 1.1783444080891363e-3 +13.999999999999993 1.2011660314775851e-3 1.2239887378622117e-3 +14.299999999999994 1.0381521121625098e-3 1.0859070853880093e-3 +14.299999999999994 1.0381521121625098e-3 1.0454191539060473e-3 +14.299999999999994 1.0381521121625098e-3 9.841681806384635e-4 +14.599999999999993 8.989266559699968e-4 8.665651479003774e-4 +14.599999999999993 8.989266559699968e-4 9.088146934930307e-4 +14.599999999999993 8.989266559699968e-4 8.189220432958961e-4 +14.899999999999991 7.79789817603333e-4 7.84468460235013e-4 +14.899999999999991 7.79789817603333e-4 8.179994182768675e-4 +14.899999999999991 7.79789817603333e-4 7.875876191226274e-4 +15.199999999999992 6.764405168941774e-4 6.135313770228103e-4 +15.199999999999992 6.764405168941774e-4 6.771167678057697e-4 +15.199999999999992 6.764405168941774e-4 6.9673353730264e-4 +15.499999999999993 5.839219194030841e-4 5.79250454583144e-4 +15.499999999999993 5.839219194030841e-4 5.827539855584453e-4 +15.499999999999993 5.839219194030841e-4 6.020234059226023e-4 +15.799999999999992 5.049190197991896e-4 5.089582714807656e-4 +15.799999999999992 5.049190197991896e-4 4.806828119540564e-4 +15.799999999999992 5.049190197991896e-4 4.8118773087417623e-4 +16.09999999999999 4.3733863083801914e-4 4.369012288339067e-4 +16.09999999999999 4.3733863083801914e-4 4.3602655169910405e-4 +16.09999999999999 4.3733863083801914e-4 4.264051032162753e-4 +16.39999999999999 3.7942896537169595e-4 3.9574447579792977e-4 +16.39999999999999 3.7942896537169595e-4 3.691844438651828e-4 +16.39999999999999 3.7942896537169595e-4 3.8663817913527364e-4 +16.699999999999992 3.287090342802393e-4 3.379129078786301e-4 +16.699999999999992 3.287090342802393e-4 3.3988516220476996e-4 +16.699999999999992 3.287090342802393e-4 3.306813086827839e-4 +16.999999999999993 2.839064245426713e-4 2.770926957400121e-4 +16.999999999999993 2.839064245426713e-4 3.0150865048759513e-4 +16.999999999999993 2.839064245426713e-4 2.9072020536657006e-4 +17.29999999999999 2.455927593376433e-4 2.384705857215798e-4 +17.29999999999999 2.455927593376433e-4 2.5639885838653895e-4 +17.29999999999999 2.455927593376433e-4 2.522237811905896e-4 +17.59999999999999 2.1277570578396985e-4 1.9745584106507e-4 +17.59999999999999 2.1277570578396985e-4 2.140523449476944e-4 +17.59999999999999 2.1277570578396985e-4 2.11286260967257e-4 +17.89999999999999 1.8462225664709928e-4 1.8536074727705247e-4 +17.89999999999999 1.8462225664709928e-4 1.8222216888690316e-4 +17.89999999999999 1.8462225664709928e-4 1.868377253430051e-4 +18.19999999999999 1.597658501117391e-4 1.6455882258961267e-4 +18.19999999999999 1.597658501117391e-4 1.6871273461614657e-4 +18.19999999999999 1.597658501117391e-4 1.5673029607806799e-4 +18.49999999999999 1.3805376639064915e-4 1.3266966289342732e-4 +18.49999999999999 1.3805376639064915e-4 1.421953722999273e-4 +18.49999999999999 1.3805376639064915e-4 1.38467920793036e-4 +18.79999999999999 1.1946336764483079e-4 1.1492375788264087e-4 +18.79999999999999 1.1946336764483079e-4 1.1277341729855819e-4 +18.79999999999999 1.1946336764483079e-4 1.250781439741424e-4 +19.09999999999999 1.0352241360319877e-4 9.907094846405867e-5 +19.09999999999999 1.0352241360319877e-4 1.0538581560753577e-4 +19.09999999999999 1.0352241360319877e-4 1.018660535931387e-4 +19.39999999999999 8.983346052166448e-5 8.749778785110284e-5 +19.39999999999999 8.983346052166448e-5 8.43536168297593e-5 +19.39999999999999 8.983346052166448e-5 9.180979382323111e-5 +19.69999999999999 7.766526701752384e-5 7.525764092226258e-5 +19.69999999999999 7.766526701752384e-5 7.618962409157853e-5 +19.69999999999999 7.766526701752384e-5 7.945156518418433e-5 +19.99999999999999 6.713708129260107e-5 7.04267998344131e-5 +19.99999999999999 6.713708129260107e-5 6.599575237104678e-5 +19.99999999999999 6.713708129260107e-5 6.693567152994266e-5 diff --git a/bloomfilter/fpr.gnuplot b/bloomfilter/fpr.gnuplot new file mode 100644 index 000000000..8449c8530 --- /dev/null +++ b/bloomfilter/fpr.gnuplot @@ -0,0 +1,20 @@ +set term png size 1800, 1200 +set output "fpr.png" +set title "Bloom filter false positive rates (FPR) vs bits per entry\nclassic and block-structured implementations" +# set subtitle "blah" + +set xlabel "Bits per entry" +set xrange [1:25] +set grid xtics +set xtics 0,2,24 + +set ylabel "False Positive Rate (FPR), log scale" +set yrange [1e-5:1] +set logscale y +set format y "10^{%L}" +set grid ytics + +plot "fpr.classic.gnuplot.data" using 1 : 3 title "Classic, actual FPR" with points pointtype 1 pointsize 2, \ + "fpr.classic.gnuplot.data" using 1 : 2 title "Classic, calculated FPR" with lines linewidth 2, \ + "fpr.blocked.gnuplot.data" using 1 : 3 title "Blocked, actual FPR" with points pointtype 1 pointsize 2, \ + "fpr.blocked.gnuplot.data" using 1 : 2 title "Blocked, calculated FPR" with lines linewidth 3 diff --git a/bloomfilter/fpr.png b/bloomfilter/fpr.png new file mode 100644 index 0000000000000000000000000000000000000000..1edc1e9e2ec2a43023cf1ea687002b4ba53621f0 GIT binary patch literal 89790 zcmdRWbySu67A;3nF+o5iBm@LRq@_a;=?+OjNhwh}1Qbw2N`4zkXkN>D6J+P$u_oqbKKShsJl48tdnFi-&gi?dCW2Ta=Rh zcbz+jhl&1xM?1b~y*_H-|NI9MQrYOc3W{I<6lI}sLk#BVDsyuD|Nl=4#BX_9`};Y% zq|huD2a7G2BgILh@Po^R%oM)vAYC8%ftZ=k3YV{3@i4mt|4DoGamlOvq$IbU?=>|w zO-*+h89N3C!}Ju3t<7X)+P7CHYiTPE_lFL}L`AdG)1?wS?GIMN+1c2*wuT*BJ=|G! zZEOl19UaS``=xPFT_ND;X=~%X>%U!?7Z(*Z+LNx)+S)1;E3T#5hRGFPYPWA)5K1ca z=01^^V3_TOVgc$Z#hd%XlarhMCM{CIr3X9uy1LUl>$3t(+8eGqA-7<^A-z6hlyn;L z^L)aWFMO1~!ldX9LN?wA7n*pFR8RFQVfTP)LyJC5Cm2np!+sx_Eb?aP8Lo?5xP* zH5BC8?$lTsGk1P?U!-(AF$sx82n)}+f8XrlA~!AVXX$t)Wo1VPhYue4I4E) z`Oc1YwomPc#h0jIDCJT2TuD&Af+wGk6*^1UO!Fj#D}x3!$9(-IRCL+@E&JYL_KD1Yt3 zLn|vQTU%RWV|pGmnHbUD9K-J^j|&(2^BEkTX0{7`dr#9BfK8}_*druYED%TI^}`pY zrYiZyPy&_@Ej-<)DG4^gUl>PfyQvwO3v<;laUo&`>6t zLko3U!r%e$+0FM3HcNE^J6c*&(no8qBY^90Z*_QhxHnS=PxJ#_ zYRQhCrlzKlkV|JC8_7Nyo;M4w+po`b zy18A-S6LVx&D3ot+Yu0$Y#xrZvat!i(I0Y051Sj!r?#)RcYAl&{$P6&cGUCd&zF~% z8FZQ%{j5XtRUSXCtgIZe9KCEEi8!in-@dikmgfuUzWfg#24?2w<+0qmXV#mcMeOypww9cM!31{L)vH&1 zeSP1&d6WBm;Dei65yTfe($3CKT=#cGD6seul3z!cKe)QWX^Ci0j)-{EGU-El8}q_j z-kaeqLJn?VF!1cSYBvwh$#d&keu4~Itm4tYe}tTfbVJ&X$M=4fY3J4jG%z%+mV0 zyQgQj=SYn^9x~4-U*G999Tz8;4>N7iA|2Wg=}WjQBi+QjdraE zeq&Tz>F&->Mn;C9H;4Hg_JIB7!oaCF_jSxQM0CW**h*cSo5j5ZHJ94j6d6sece1A(^OF7B)6E78c1f?Q?Sm6AUYG$H$K!Z!Qio>r6L_ zURbpl86Li`6B4?O!)$&sp$iKOG(}6N)^m%Ci*s|CcT#_NW=x9ragoFeHr=+CpjLtG} zk5ZkSbTczEt&PcL%Z|d^ZmofLsHqn^;$=E4QalJa3)HJWkcU}Tba=yrA>~HUBKD*o zF&WuAYqo&iXZrdgoqPfA5QAmecU2R{))dqabDy=hx9=X}@7od(wDe1;aak@A(bId^ zjw1FD#IEj?#}}_$Vd~nRjtxJU2sXmS!;6ZHWHRVkYGz*yaC3@}jdjlN?ducn+!dux zJt)!+!#C_qkb`@`-Yu|PR)^$oV35Sx@U=>%?dDzFT`DpOF$zp{!x?ze!DXqG-o)mdeZHaLe;hA?ukGq&7Rd5V#4dsA>#U_ zw-DZuutly@tK_{HsmM2(yn@e$iH|=&HkPGceIb;q&|;CxZg1<&>2nJMh28!AH}LQh zTc1tkGG{E}8>*_R!m~{_heD2trR4dZV|a&xqNl{RP?W6DWU{&1^*Sm62aB5*#Bd>R zSlyY$MI(Lv?`cmT=;*|jhvga#9jr9a#HZTb!ThkYVyvb%bn{_k>$4v}egp*vFAfy? zdwWYOE8`IoO3bS(s%oqTsSxAC5IAjjOnG?zSS2VPxl6Je85mAwBHq-{;My`dTY7Et z>Mc^zt*xzZvl6zFUt3yQp6KjHGcer7b=Mv!FzZmX2iOf@;N{DgXJemGD?NDN$m*D> zCgr(u8fPP&jD}ZOMMXtUPEJ*I5Ry;RrSmX7ESHaGi2>K03;+>a#Kh__SRNi8EiEkw z-E0G@cH84_kWD*eBP3y!AyCN6$}*bG$eL_;v}0E-X-^p_tG0JWVoDQVNQVjm^%6oSCSU++1Ie zD@{5wG6KgO0w-_8i^GHc-HmyhabNkk$B0<1>Lc=ngp%?@%j8gjnLcC@)6peAFWc64 zpQwCdnY8LZy?>9Qkt=#7I6K?S+S*!6YaCWL&i`}>m#uYc- z@9n{Sw6(Qu%=KoPPD?w=&|aS(tMi$-)NF?c&6+*6K}9IHOLqqr0pxz~=@BgMLexPu zoQVJ^(mfC;y^JyW(H;K=I3DYFV#zi=ay&dd1_xDVmJNe=BfdRjq^72ZLV{r9D`MzI zM%P`d8;#bnU3sFVUHFLiby9jseiwR#T(5fi`etTk*mT=S0qZn0(5H$s@86=Yr9}*} zQNpiIX1DZkw^Xaaud}=RaKWM!?}u*S9cE?=Sbe~61Ox;Vgj(9t1%-ujl)eg7O!+yD z4;fhVSmirN4(`9RNMgM{0uVl__1VF)r~RdSq%eSC;-iRwM8;U)cCYKri?$-G)w#L3 zlKqYEt=M}I7D`G=8vO3=j`>LNYwSr?4BVN4y>oDI0C;t6KN&uOwq!)g8h#?Ze#&OO z7FSw&I8bOIps}>Gvl9>y0D<1UX1Hu^v-EHu?(O#B!-dP2AIf0@;`%Y{ZiaXt60IMA z=5uGy*05bjv0kYoM}$5p+*h!Z*oJ~8k*QUs0G}o&Cc3)0^%vW0D%2t<15Z?f=%Zuq zVU0hsHr-y!_VFr-Pn{EQ#KF0ko|d(%-B0)Kj;7%#A4)m+!V8hGZ&`jKfS)z_RaJZw z%!LB=<&NiMq^0>^DVR7pIYC7=lbabnO_K6JU7eV#Q;aU>Aj(w8t?dj4SV&Old z9-WofBhQ{ai%U+%pQT`lDlN5NBVZ-$EN^1IO6_v~=tNBfO>5dL5$iI~8ik0lite?NE2> zrp59`d~Iw@N>0vBPY>j_w-Xh0gOeLL$%|GM8*T?RQD8`jDB;6)wRG4tV`G}W!s}2& zUPZo&fdSP~2$ZL9?$cT<^mE#5baZts^rVj>b_LXu<&&#W1W9=Md3&Q{V)EHjsBq`h z*9(W1-??)~@hlw+O*RpvzS6xFS3%Mb?(VkB)z_U14jM%q>f=2T)G2w`5^OW##J1ii3m0sZ*zLadCx3M0&rw9u;FxCyM4|u3x_%8xsRb zPLDCDtgH;Ui{arXfuj8L`^&Y24|sVK5)*;Rk)xD@?WL5=lyeHo7}x7GIoa9ko0~LL zRF^JYGI;)+DNhpJ<;&;KWi2fdR9GU>ZEZ(Z=04qpPem!gFbO@`4id~W^_Hu>7=CreuS^^Scm#agf z6B8Sno1X(d=+IVB2mn;8rbd{(?O(e0mgs#by9Mqg7t*@EJ|(@(%uE?lbUK}g(*XX} z&qB}<^VT#sXKKh#f}3%0yn6A1_U_##sQHG2(vlRUrM)3HHNQsYf&Adyi;aVmuUUt# z?HU`aBQ1@-o}1oddiH3>{!-MfZ+`3wSA80)A}R5ywz^s>STDP;v-2Juofi&Na{h5m z8yn_t?%%i2zS4j``B7Vw(0N?1qb2If#9Kq!=?N7lD!Q~iQbc-rcj1TxDr1T0)RPG3 z!iR&TRgdYz`Ms|%MXhw<$B#C^nrKeJ!NG}%vrsga9v;|eOK8kj26qp4U|LvgURSkz zv-2Ht>G$u-)czRA%S$7buU#v9fBFxWUfC!uRlDD5uS%SP5W_UcOwe z_q{Xy8VD9B&Quf>zS>c95G;Q6BB`sZgSc;+#nz}u7bHl!(;j}HnSrMzILq8;aM0rN$BS@N@oJ=2R5I5UW)%cc=_$G>ao%gMdi7!GvuU@^1!<2zD3DwiX z{duUBXkSvDoXbL7MeM>f_ow+Ny&BkUXU-xv$dzN1LjFlOS}EDw#6)A-1b7;MRcjY( z+mRpDt~8Ge__8I!IP$ZyI)Gh*RL`K@$YMH$X0t0fsTCOXV`iqle{9(jcoJNds8&mB z>$bue(Li3#eZ!AFK0eK9u;NWkUbm5jWhiwJ?xx1GbZQeqQ5Dnx+S~A>wrW!2)6zh(pxdQeD>_wOXe|cPibG) z{J7EPMpjE_eh;_G?kQfS@zso4+AkLoWbkYJs zQw&>!p)h8%jl+tTt}~dMo=(rs*5bbID=ep=K!Ju5U?Fz)%$fN^|A($vftl)LEkQ0< zYC_DISl^dh#h-zIx_D>-R3;(@c#WYQNGR)k=<0g(=Kkk)=UdG;o1PipFj?WHvwE6Y z`(%lj5yx@+IyMXjiTwy!JPCR6;>FV1n&vjT2;??>ec(oj*CWi6A1SM;rMBzrtW5)L z>?<4%8*3sfm?1S#1`h!)(-l;tq5&7^=5V zx0pi9A-8;mvr$_c3tXi=teJfegl*UW(@irodiz!BRjj0*^d8O9nQA>$LOS^R!( zNH)ipXNMb2Vx` zU<&{v>LrMYj*bqKgoUM@WC9UdfofxQ)#T+%ArX=9uty=ht``>Xtsp=Rs1Ja_2L_6v znuv>gHq#M55pt0SZLw&z8Bh<9dS<&DhWo{5<=rl@b8v*TOcJu2H!AD_3R22?)(S6y zumrgri&_OjztG}6tY9D*9kH{tn}U*(l0rgmAOQV!|FksP9qpQ%$%kogot#S<_Gi1!$(O{FyaDF>DnyyK6gm%=i=kb zNHwc$C7yszd@qf`#+OVLr@21@R(+5W``-CjqAOE~ycnf{kC2DTsvHES5y|T4dQw5R zOmI{VAYseF!~@x+fsf$5LEZb8mz5vsGRJSC9r=7j&K&;>J|m7O)|Mfh1)!9qo_e5Wr@pCo2o* zI<|)Gw}zRfQj~t>byJ-WxuJnU+z0YxVBN^cW7E@H2L@~*T3@+x1=tHXg{J3dXj0*^ z(%)bI^-Ou?yo=bjOT*MrTbmo-8vc? z4>m_uw;^z7T)p?}GhR5}8l-n|Rks_K9uv@MX!ff7gc@bSk?rBzi-nY3_POnr-hxtPrY-eY59qpa+H z!7W-^$*5LSQ&YfknmRf%7Qtaqj)MZ^$*K428HO*3P`Tko$x@zocwuZDsT@}dM7xQx zG1TW_)-&-RKYoN#(#q1(P4~;M_dIE(TE>s6iR;vx`7UgF(X*BWB@*CYjT;WcP+)01 z?h}10?SxwMU9f7n)PEg&>T4rVx6C97G8};?|dZy}#4SsaJzRJMD!Xiv! z5>Gp`u+TiY8XOd)`fB-MLUeq*3lWNhM9Wh*CTo2*GKd2;y*Tb7f6kSDhcF5sR|6An_lhW3yTr1KJHLoq}k{Iuw{7j(X~< z{Jb?%8J!BH8#?k$Utv!W)W*Jgv0E&>+MUk@8U)BCN2mo9Y7yt@_)@7zub3hPMa&jr zS$|$@b+r}{f1vrog9dKX{`3OGYvBK$g%_8|huA-N>Wv%c`}nkDNN~^Vuk zB>ba>>5Wc_$Ti4qn%f^qV3aWUj8|W;k`syfp|Zj>xWM2WoNv6W*b-@v3%go-RIHyo zw592PQT^dCJ!w=>RGiLo%an%!oLn#7+Z8{T73;5{sQ>))cSwE@N>!N?qNO>6QGfjo z5j{rv%*{6xoRMN~{@c@>`j^LTRNltC@DOrZz&i~dD!qG4PHP_(#2VK#GJ-ynUpe{2 zCnX3KZ+v_lHj22oI0#B!US3clB+92g*VAJI5g0|>6b`VTpC2mM))VB~R~&B1HEgxmULQIE)Y056-4ay3!7su{g=2=g46lAEjTR1oSHCRzE-b;8`k5OCNtP07 zT~Yk+U0r^GfmYLPx46QgGR^@PfYpkoMr%gZlNWIi8UD&-(3EUBSa*bS(L&KpK{4ek zuS&;S9c8)RDNjN|A|fn&X~7nx;mMY8H7O}NVv{#l`svuj0hehfJwR$dJ{QL`cpNoO z4h~|9g8uwX^hqqiuL9W>_H#6()?RD=^DRW{+s0ByY@K8s0H#sS3 zKzn;-HrVIepj^|Iiqc_F)s=lD=E61yu3sp{kW7mEmMcG zQL))*jXN9?Pp+M>A?B;V{o15)GI2Rzb`$qS25j#60qfHkF@LZI_$#fx6y@fO?NJ<~OB!E^0 z-j8VMX>cR}q6D@@Nl6LHWWWbQ_3juww8Un5XJCaT!WqUiT@6DB@B{0M; z)4hsSMTAJQ=TA*Te0i3cuzn^aL5yFa`Xpn)q1Mzrb^3Hm7^i;5_v5k?95ygUd^>9=zk;bet6VOIR5 zUZizUGT^d1k=C8Lh$X4Gw7e`NE`G4qF5zr(LAx#0((b9;CU*VjowZrBC?(43->`7W z2&6$!*aFvu9btb#@rHqc5pNvvC^0!TwR@Vuc4rMfSva~4_%@S1U@AB`Ij3pv1y-S- zZjdAu04HWihStG}zOW{wQYRmhtI%2uFHl$F1~yNt)i&L02xL0YxBedzr+}ARHLLpUbwzg#|JcViuOX z$jBccP(XcgxZbU%p`l^L5ymRxmf+-s46YwQSG5cSe19_0O;J)%2tr(LXt;R!^7_gO z;JclrN>ot&18h0|ry>j#8Fiyr!@(j}AQwT1J{D$x29mYW)xB-@d7BO!j}jDbDk`d< zC6pV{dvH+Pg&JymWgLWdpcxTNp_?0{&Ic(is8Ya=q^PL7vhW!DY~ss}1Th9hMX_%f z2#OwYVS580PmrYqtqEA6w95V6O<1~W_4I?LJKTnS zjCwLeXs;mtrWSAXeN_KO_yLFC$cSeBHOKgI1*D#C2~_xlAoUQOL<_eWyiVdlL-;aFT?T6IP%Tq=gBfl^m;)EQdog++lbcCLG^AQ76#3#45 zoc1wc>e=Xtrg1bDGEq>#+r9`v1>|-IW6j0*H&7W3IMRpOVY7AAkKS2nHAkEx_o90vnG^&Stsv zL`0@)5EbS@Y~W%}flPseE1jt*e+F7Rgj`U z(E!p*r>fHN{Jw`2=mDWv{SOdAKH^e=5(^&(r*3qa`f&jxc=ZyNAu~ZjJBA+g!X%!_ zpgd@5X&Hh-RwV#o6AA-{9#t8c2mAZErTceb>)yNPtEvhHzVZ5NGmGJNJGuvZ+pxjU zp%UC!UZVPV0z0Qz?l`9W4;lP&C9o<+0|nJdAJCDPK{cD%?9j`8`0yc&+c>|!uI@G) zoAqFkRYwZs6DS`N5)uH9L174nw8lV&S_PZu&r_u1`-`j?%nT5GmG%U{oUK=&oddAH zxJy2Ic|*sgDWtM=>&L~JZPNl}VPRoO$!~X3i_|BGp)TfPWmN<-<>sIbG7n6lrC-QPI)WEiFc1dEyu(@$FW%UGMz*^(&MnjyB<7 zZUgfvFhv~om^UKTQt>*aZ~q4I!(sogfl5zeWX(=ym3H$85P>jXjbW^;tN^5qa?7fspqlRMf+r+0^)#c6-aUP$+_g3KS{eUR>OS zM~PxFum2Uq(z9q{w;QAuHb;}CaY2V%l&Q~BKcxb6kd;#(n zb_*N6-Oh9@7zjYW+}8lYCcwuBuxubWmaZ%&y*cKNkIu!7s7}3mxWGCzeY_$?=?ey= zzks2aAeY#0RT55x)$|2E2<5VcbI{pHA6<8_lg?RhI_H%p%D2f135}PRS4IY-6WG;^ z%uFr%z~EqISRHV%+S<&4N}zZl3fCQc%kG0Jh34}>$HoiK0tp8$I2G#D#6;!17f(Sf zS)~vVsD;uAEPd$;VA$P<Dd zaiBjwB=$;g3-f>ytYocC1ab`M(I6d_m`x0Nv=fhzE1?jM38Z zj}ilKahp@^yH{^ewBu+t^R1ae!31^n>i+e3+4|9C@HB(Iz#p}|w)TY`pzA|YbR4`7 zwWFO3bA8#agX8n__-JTo7#K2AQp*DtLv;Ejq(T$0MJFc(1O$9YK^^iIV14=I&2QA4 zM@~z-1nLo4A?J~kh#sm1Nc|nLfdR~Kxc1lDhhD9Ufk_P{N`MRy3$EN?DQgptSdaxh z;vR#n#(u8`-k|B(!a&xm)ljAoy$1#dI4*geZV=heJ4~IB{OMJzjSMYQ%-*E=7K^GH z&L2thpOl}XtOlVDLKy_L+YOkQ4Mn%wD@Ht8U;Y3kM+5&1gz}bGN>z1XcDA9dIqT(U zO;J%%3)ikFIA%37K$=dlo$W#-3y9Ve;JR#ToOW<@1iO)V7>5;>`7@SzPYE!?lRUcd>+*6)Vn8;9EK>HQ0` zwFay6;)Q%9n{z=}4B#rui)ekuwYT&jT|uuYONszm`G+MAoH@7{Ia`RH|kLWKdpZG;jOmz9CRN3XHIK5|H}9sonKvOqQ{ z-e3z(*rP;4p=V$KTQKnW@%AOUy1E<$Eqmq~Ht{y{4D|GF`1B?2dRJRM1U-pN|1+5~ z+5+1ayeX0trl7m_j6kl0jSi;g!^#C9^^bZBSlQTu+MB;1_$lZw2nR=8HlUP}oteaL zRhy=e12PSU$rlbp2+LM)?`=rJgY{#D&lV3LZ3s zshG>Q;NUATqTter`(XU1Xia;H6kSL|BdYGyt=De@K1+NWJ@(Ta?S_lazghF)mogNX zP!(#hpW;EI51fN3@MQTKhwIhuRBPlg$fB#I6+QHII-13J>??5DV5gz|#mfB-s=xS~ zc7^;`+s2BDIP1BPDj=?d?Vz}rvophbeFoYV0Mwz{>;b+60*Vmq-%<{+W`p1b0fU><^s5~5*K`W3?tusg&?M$j`FFhauk@5BG%W}ArMF5&mc1bU+^=o zA(25)fLDdFn(#+Ua{`UUW&=$k+S=NNhRKk(yC6chwk@QW8|!qMP0 zlS}L*tMqFahZYUJY&KF-zIa)FKE7`>O!u&r{{k$3dSZHxJuyBq9BjY|qM^9WgE|8E z2QNPV^}FBcoh$_~4Lk`3LaSGnmxrvUte_A#WC%(uu~%GVqBEX;5s82Y zDLMIdEUc2Hii;f#)8LgA78C>*f8>rJsW;U4fuevC9!W`gfqQJ|1@uY@n_P&(Onw9a z0-VjDIzy{hJeFV2J0ANB=KlsZm6rZDu<0qdcUDSGnZB2WEL;Ou?zjFRssb2pVxlb= z>OD7icOXf2S`POl3Z#xa^dL@8yg_76EuX#yszW+}05#RsE_{)%Un5y(fy)g6ABRfA z7i#_$J${cB4TNJT8dm+c)SqY#6KPG%liJZ`C>5r}CRPz`0vrT1V7*0HJSG~S5_@>w_jCM$ znwt631D&*&djr%Tez$k-ad*tSL!sWtf9~A5znD2XR6$|`5bt-tG_4b-IF2MBdp{wBXA%= zVK6r~R!UcYD;^H0hLGECuk6ifD690JKlgBV2R2$c#~@+@_)|I>nnj>kAbHaVx@{Y9 zX@Rp9A?Lxwg2NxmS!e?UYA(lUh}x$Pu>ruv2mGrvCtxoB@NoOBr4cCqz#I;3NmAi% zu?CPBq3%;k_G8WaTdTbc-AriDd+p~2d)o@rf~7`As!Ok5FjZ){bpA1A3YWwgAfp5a>3qN30`#I?uMg={>5`K z4>1BLnenkPOK54z%X51Bwly~jUQYr5+xo69+zzy21qEzQL1;ONjU`{dYZLUh267%S zW#HX2wzOmkZ{da(CLXlEH*e@RV@B2Y^|5hs7H4Fvz$$s_)VNgYfiWpb1N-!&N2Bvq zo;O$>>{T;g{iy`{i;|nid&D>bE3;pJyV$>FvfZeSQ7A(486z%OJ9ik?J}oRf_k}jf ziJGK0df7^E?k_>21V&PS=fOa!eQ9y=V=64XJ*49F9^joohL1ltb{jU#?A#otYc<== zz!3HR&m{szH)*cncM=nn{8lx!({pk*mX@#wRUO@YB_SumWrll>cAcX$+=T+9ijYoy(t@I00Hv65+UEy zUlMUy1)z6SLN#1+j~lG?T{mCr{e$jt0HkAlntfCnU1{2AJw~P%uP7^}g>gWbQox7q^E?otp- zq+*a9f5)?7Z!ncZW&u@NIu>O4*}wRxCk9`?ehp?b==O!iWk9H6VH}D&P5^!(QGmG_ zj{XoZv$V8n&{N6C_=&U*02$0o(98zR!aFA?S$TO#Xo<23&>RbbG`)ewA7B5ymG)8> zzdODv*lA>CtL6g1Qm(3+!tIdm0&Mx#D=r~#-Uvww%$4m|4n+KWYrV_cuio5$VqhdM z?+=acB;tSUkS(zD*W&zBwo1pt#_oY<PuV%zdtzYpq(y^(}roimsnoD8}c!jH!07?%kI?R8KEJ2efl&u zJuT!d5(fR5pslBHTfeiJd^( z!Jwe?0YUDY!@;D1Zx9d{kC}Z37C;bbsi~+`GPT_wB_g`1^z;(S6No))*^S2pjg5_; zJ{sjXd(1$e}$h^>YCdgk+=fT*)| z-gXap(Nz@t<>y z-Guf@5|oO%I+xz5qlQT}Ik|_q9Iv+0GBW6?sXh8;r>7N_ly;!kVHTC*rwf@79o^g8 zd!_swVbAyPK+Qo6DS~gfU<-x%hZ4&bn8m5b&|AW+n8Ap3AFBHkWAxqIN5*JEOiay* z5!tIgidSGyJ~uMzMtyBd>`i(P_ws;Mm4tl{fG`OA;1~dC9&Z87D}gNu$}Goy*NDUN zWcs-4@hzAH@Yzf$*x35&>U6<#$7XB3IA{xPx~I=w=`XZM&Ch3LVtN$SiZDO)75N|D`6Lp#ZEQJjKXJKJ#D$jggNk!$x z`Ynt4u4HB4OyWEB%xajiU;iCoWz0KO^8@wD6|m{D0DTXVFXC$nzp zc2-shyG06g`4krJN{QQ79d|-j)z*3rVq$a;J24)JbFj19{FB2bT~I_Ms)2=6s30#d zNg+M5q$I4bP_HuqT)YJ)j~@9!qjV=L5z*(Oq7c>&XF@yBoZ^e&SgoNzTcT+=sC0RG zd0)qk^%!@u!@%7IW0oa^?@53@&Q8KJr+9$70jvzO5?>76v|va&+fw}m2@VakbCC)` zRF9V}fSMm%8rU4{j?SCM3@oV-eDBDSeIpJPXWFxjUJB9_VE1@01_p-d>KirFvtu5$ zqk%)u4UB|@fHDJnbIlc;TK??!IuFHYTNM=Db1}F<%?e{r`>zm||KpL>7?J_BcGhgb z^XzE(ll|mOdF`D;+Xe=}GST1P542O@=tX1)_y&!c>~2+!^AhB*!FUA~lxwxR4`C=X z)*;w0z~%tjy!c199`YIJ1wGbiYP%_+MDI&W=IFcLoDhIz zPUcYMfTcn?_j&qtia#0|6CQFDY5anq{O?Y5bxf8&-b%f$ojlPjUjH*&e%#u4bjK$ZIP$4|j8H-U6pxXfj4Uljd)2O2P{^IXcRpJB}lT+r{2k;L0cXmPwc#*fyx19!PZJOEm+3h6?fLzCy8-r)Q zCNmI!_#F&gS2?}+Ee=NWo$c{vf4hj^P1AMHB5XQ)=WSkW&D-2DJjY(khDLkAXeaI1 zSNXf>|Ha^*$YO|r)?O=oQ+Q~?Y`NfA>Sjjt^Ug?EhCfO)KDCoSU&4<{EM&@irX}7N ze3zqC$kCS7;j|jn!M+0DPwNE|?J3w$&VVjFRe1PYFHi4#eqVcDDqs9~wUx4YgaO>Pjg7fM}Y9>%L{H2Bywe^XdmKLpCOH_r_Olk}%%n zpR=cK$*1l2Iho-K&x&VGh5Lp!Z#H7P#hd6|1D?^5rtg0HP|%@u?asPPfVJvKSd7%S z=?8Y2Uy#+MPtnzRx_!DYxG~vmBvU&-W3bn+hFsK-BoE|F`H~XU)OE|F=hZ%+O={|9 zXYR>4MY(M4_G9g=rD>bENb`@eQTLeE9W8!=@qO~=qB9BSSLTj~ij*&j?T+?G_D70L ze}fF_WfU=#t0a9Ff^=esq@|POHto)sWYj#2McwxDf+s#-MaoV8YaqfzWJkZgO*HXB z0;zmF`?~57QFx%-ohmKms_!_t+#@9ucb@9%7v!((%pRug_Qs08vQpe4Lws2VTuy>t zU|65O!m#|?!&tcc_D4dS&+HcTUk_9&Y)sdv(fpoj=SP6}P7SzF3O{2lyGawS@jmTn z*qbC2YTDL_P0I;FcP`xY>lN9k5#6*Q{~0Nq&N|bKtTku6ts3mc`c!2!)$zj#EoiCH z;IZGNisgc~v(Z1U3MW5_$`B`iai>7^UBZl8nZ>3~yBIh5)HfrpnZ+mm=w8bVU-yGZ`UjsbiGyyt#VqUaP!Z&j;%T%{wPyhu1NA4JDrG z?o+wFAPe=$X@8Od8$Tv-Ay}8qqT}JdHhBlTYbx<=RiEPRVl}&S`_!ifbn6sDUKPPE zL3xUv;xN%0I3LY(Ai&4PX8ZMMXZ>7|<5`ehp379gk-?Y{Lb=8S_+b!mJodf{{@iT0-vk`} zI_5RDMRi<^t}01QzsgXhKFupayfGt;A50B+4BWbL%Xun=lw{^^kBO@GZ-!AG-zB!=te&q%>e%=AQbI!uP*uVn$j6^n*0J(nO8Cc72Ka<-e7AOBr|91J{>SSQC?YVvX}Mq~$})0i5nF>lrF8WW&Qr5}&>z7fye zAL;6t2g#wn=#tyU1HdCDR%0DMFP19L3owc-XOO(0NS?Y7B)$sY^8t&B>gjZ<+&hgI%EkME6yoirM`y@f@POnl21Sgh$| z<|6Qn_dRy&qi-DS`+nXpBq%XEKGX#uj>3Q3LMD0SNaLZs&o_(d+U>p86wO6Os#9{` zqKPrf+o|M?U)!yBoH$=>w90Ak(zJd&6c(GahS@@%tuL16Z_Fg5vgMg=ESSF(`V=dp z!0nlzQt_yx1>>g-gUu)DYI!ypRT}#;&d{xJyC(YsuP->I?7H;KA9_rsN@w%QySc+S z%jhX;8ILJMKGG{{71Qy9yy}bk7paR4YrlROX>}AOu3;EE5P-B*$r*1Vu^!;|8j%bC zW5lYZaA?rK8Cz1oz|hb_-zK*l>tt!xcv(M7Iegw@g)|5?IZlK#-Dv$|`YJDr+VQxs z9e6B0W%_cd?A&%^qb$80QDYUf5_0jJm3T~kf?gzfXxp6DhBwn)p2H4{1`g$u&NyOe z7-16jLu^dT8XI>^{Z7us?^#ZA%lc&IpJMY9oX~O>-krlEGs>ZUSvBGOD(3iJ??u-j zZ+dm}s>j~adAYL8-~Mc$J}PSE0E3y#taD0)&!%(@ujuSK%PE=n4^>6Iod4@+X)nb4 z_KGkm`dd1YIlaGXc%~`SVo!@FkNgAyjN@c=l5z4^MyJ|+u2wRW@RTaeP`*H8UTnCW zML>61c!NbIHW{C3W#=Fq(EQC2NB+m%!AV^L{8tFbhKwP62*P}dqsGe z<&SR}*K!mUC`;|woM#BNa{Qa0J$^*!JJF@yrXmMndCom>aF=OlvCaKBDBQofhJ9x5 z&seFqfT}S17b1I&OX0?kqLK6QSv&fp>QpOo^DsNswqhOK(NBPq!tnOY@h@vIOKa48 z!TN_l@F}<5JCkKQuf(*2R0S4;;k!sB=k4RC7Hs(5Mx;}kb2tDj8-@k#$ z<1F)Yab)f{j)Wn-Sm;f)%NbposA1Mt#Wup4Up%m>j6S?-v6&~;O<cRLaRf5&XU*2IACgYllf1H( z?bC4io~eAH9q`A^n4Hv-8Ck2q;xA$%Q?uu<__-dZAwEXOoW3u$aXglW&Jv6LY^7Gu z#nwnOYkfQ9GJ6a77TIRLYh;C&L?6!tiaP+#OG<=aX!S&`^H<++4;tdxl?55cjr6<~ zBf+(^TFWU_{kA^p+bvb!g4f$Nu+ zE5WtX{@gnq#aS=wpKUt-$|k&3pV{ksg^ff@LLWR{mlN~&?kRVf&YivQc2an)iu}_% zGn3~HQjUvC6r7y4)8*))jI69SY6G!8YYkotoSxlk-h=t~ekShwFQg#KWqriXQ%~-j z&ECvDb88uoEt}=06B!4Cvi~^i^(#xdf2mwiH_;0bvHWhP|6JCXlWx$-goI|1p?v7TR(mzCsNy9T$SA z`%_uE=OKI)VX#Y-W|=W_^w6?q7Bl`FQ&~PAM7nRh>O@`5!Z^ci&7poF)pfOaVo~c2 zZXWf!68!-kz+Nl&_KLp#hZO0PlX;C_$mGsL7jkX3Zq=AF#h;}eH+ z7LkEld5L=>eaOgl7Bepus-nypU&QEInzcy0~t1d{#?7RQMD7tA=t>-m2O zy9%f%^X`pnVSse?Y4Suzt!D5=1YjLkLrBGhA}AHjkfWPevTfoNRDqD z&~q#^>)d2O=NKzSv{N`*k5L%3?u?lGGlJOO?PB*MLOoY^U)5bknHYGnM6^BAReG8{ zNDRF2f|uEo1GdN&F05PoqPb`uY^S2)nx9|qgPQZ#oO~yKnE(OL|8z1wYc2kLuW-+y5anWI*uF!5uOYBT0vaI|`_141bP zpLJCo#fkB!rz@C%q5p+5+COYm;kFl+@v{>Ds>b`vlytgu?_U$c=1#Q*Zv4FB+|o@G zXDm9G@9>Q6pR=-ecm7wlKu&WkCyLWGa{O?kh9I@JJ_gtB%KS>D;EJ;u0~0IpVccAZ z=j(mE^}*V2^TvE3P7Bp7VS-kB4U*5B=GmLT6*2i{fs%f=1vp;z1FysFPeXb>%{;)3 z{7cu<`9e%lbG{Ji`#tsF+O+P4UtrA)bkloEe2r(H6mX<}?%NYfgekOg8N^W%Qvz90 zmn_vqSM^Xqg&!4EJzko-N51_J_y+ve18(&NT582sb;=}FDI-;W@Qq>d=YQBt4+RzrJVNV!V7Gpk zJoL-io&M%fpNLUAd5wN4=UE5!t7rQ06H+vSX;2Zbx#aMrWd5D4Fs9pqIpT*m9E&7w z)RhH7PNZ+v)AvYhjK}uFFFDbe2oaz@L&7hzEt+@%OD$Ntp4a(FKgs;T1^5dEIZAHl`NxGs!sQg#wjYNp zEbLJ}bDeqOZp{<}FUvI*YaGA+3*#8#k>DYC&T~gwWjj3=I1m>-A0}Zg-+DuuC$5gTZZYi0wD$3>DLm+05}o*$7sGQ6G&kOS`5x}ie`ee7imupKF-{)#X>cQFi%NNCyF7u`$<5>W z{Ns+=60@H734q#Y=N?s=vg;nu<5#Ej>*`&4={O9D$_DM%5#mIX( zF{HHK;>DOu+YaBywU&K>y?0N{+$L_z*K<*OqnV__fNf8Tz4+Pn_+c^bwIIYD;peH> zmuk_di=ARhAazfYaMNEW0t;D(u~W7b_W!29hEaP={eg6D6Nbdd+YMkL3i+-bLC`D`}k{lA2ahTk}EN3NBm_ zKQ3LX%?_6vXHeNIKwHv{d{;QgZ5r^XG&4?Q_~i zfuw*aHheF2bti{{JMtMClJJ6?3xd!cluHgnju|Cncg2jzS58^RG#Lr^1QZcMK&c#S zK*#sDJ~v=)xHAs3n)ngU#B|a4bXO)rmdj= z92qhVKNS_v95Sv%xw{<}J}C=OVjpW-sdKVwfrNchE5Ghke3`^&&URZ>2v&meFHV2s z4_i6^AL;xVakVT~Ajk_tpM6ZkA-toJQH;Na3puZD#O(cn9r?vqc<+!&VAfu@I_Zt^m(r_D@-1&sc9DNb6T5*23Qz&qot`C zHmCZc=vXw6`saO|7NZ~6{}F$vB51KSJoo{A>7B<%E@@nqbB^Q`m|jQfwUq1Oz4c$} zsg8!OPqpSg)Rdkt7Cdv$bN;s`Qh4Z#ribkK8Z_WAN=K~2K-#%M1zZ7P757Lah+Ak@y;xo$14c-+kj37Iqqo@rM%XX z4>i$iaSH=!Ng2oAxY{fGEL!a(n zFuZB7OR$+C%AK6CKV-6q6#%D(=Zlx}iI~FIPXsh*@7T+1{j?*&xCxQ2jLA1?0+*(b zwkgOCJjV8clj{o_cZwWe>-FG65##M0p^f&Qe~{9k%_r8>3kTj~QG$9+>zf>f4;x1< zN1YyIZ7L0ek)M`ljd6TqHW`w=kMLJbnKD?m8WJGVNi_9$8F353WHAyFJSjAry<7To zZ?ur=E~h(hoko*Nv4!E0mf<}nQh8HC3yq??hBwAPQo{!yk;kXHLa7q?c{TG@|C^qn*@Yx+L5YFuF5dXMv(@zpP0 z6Z(nQ8e{RD{D!s?ZQBPohr?M}mkaq(_mj1^3VLJWV39^n^HaENyU8ReRoB-}I0G9| zviH+C^pv_IF0L&KA2R=l>68@qkced3UsOiK0Y7yjRk3$qt@j`H{r50oSM}+WiQc~l zTwltN=p?{$LRMGv+_1`!mAZf92I(6@{-*Dq^7T+C#!}~_0$#e^|HCmNKJflggF+}k zsZW!1+8rRuug`z{dQ>wcS>Jh)bFR0t+UTaeM%-kiwsx!C(h02}?|2AedBDtf$b<%@ z1K!8`abZy$;wwXCF31(`-jmFWN5+AYknFF`bC;=-Q1vQiMkv=gh)fvOhr zW`-@dLK_MM!qI#Sam(F{3{l0Zxc1^*j?zYZX$Cc8IDS(qZ`IgqT3iQz+{fo}J*mgz z<>W}o^}f)LZT*1d}K%teYs*LVzyjzhyp6I95h>NB?@3sLZ9&G(;kUe4RFQBtQ{RX z_@oeA=_p5io$>kom=FJ1pMit-Iq|cC++oyvq`ZG>yf2fGoZ0^HmS@6fs+^8bz=7ZW-rwDl0shR=7ByI-RFwn-!(dhMMJYAX}Nt}jniGS zsLgfB;OnR*-_m=04%*Ivl32ncdoX* zJUWe(Ws#Go&a=Jowl8bpV8{12SI3YV`&OLtdgIi8)iBfnt446`KN{kb(^Mb8#>u`q zBCPwFJ=5|*gt^*}qD);ZdHW<0nsC|gVDGtn9nBdDs~>Ioz)>zl>D6s>d8p}FDVo`# zbF36zVm9nh9mKM(%Mh}4GjMY+nEe;=x zK|14T=NItpP zu*@#z{<*Ox?9jO!xHJCXx=C(6b7QuV?E=H?&5V&chFhD6 zn8yhH*pH)PaBW1ND`W>fn+TNczE^J&K{cwk7hClBAR+7hccKp@7B@Z+Ss#F$=%rqu zb4w}g|9MYw2b_Eq=czT#i7b&fmBh#*AvMzJol{LX?8|xOHQxe#?;uvMb8!qSdcT`SxELZZy)QS<Ne%KiJ#@!C7YUjjrr7_Gps0Bz>>B)?b_|-qXtY z#EdQ7BSN-EBr^S40fdM|_+eT!%xAwoX1NRRc>U6P-7uE_^wj6G56#1|jx^@V?kVSr znu-1NblN>Bixun7x9@6ckHF>{pTG4_PS4s7KqirgB)*(Acq7;DwXUa7Hwsq_BNsjr z5u8#3McyCfIdTw9pm&>KIlKR9(B}lm+wJzyO++^ZkacDN`vvVw#5EUl&?}C@g!BmI z%0EbYfe;lEkKNzNTHD@No^v6^)*4&;U?Z>*snM|U*xj{t=9f+lT|4h~3*%s5*KO{f z6ALT|if7}*@cQ*--DF7sZO*kNQTuPov4j)iq?MbC+4Q}w09-O!(Bge9$1l~8jS)GI zLicw7U9)LuOL2!H1oBEPVh?K$D3`#oq}w1Ze5< zrL@-YVynt{(mApAD%Qy)C#%Jd{!7v=B{)axo0~S}v*(vzpk7is&~25>?}QWT>@-5E zO5??0qka77#o0GUb`NkW6;?$wI4=5*VdT03a`g5%Vg7nn1v?UM>|VaX-DcguH|Yr7 zFsVMkicnL&rl&fRaY>XUE`2;LO~n3p9CB|&P))#D42jqbZ-S4KNY8A`>-NpVyr@YG z^{qPV>4$`h;d+)cgSEyq37PR{k~t)w`uEERD|;eAIe?Zg|Sg~4#K(%?*OpSkQ^VPyx4$~;%C<`5@rUy|yJLH4q%qJ(d`e5ayUENzt(>shy# zn2`>}@t%YE`3mGwu1PajU<+u5i}1!q;fGpmON!95WGD7;`Mdtcn{J5Nz7J zt7tprI|k9jAc^G3`~_f6$QCWPS3X?caI@bit_rzav?dpy?nJn{36Z<+5;MJb&JaMl z%uaR+q?sFjSUP!%wY-Sh`N8Rf@_Xq-3b1@C&bPOqZ8C&e89`Vt65)zG;MI8&uji2U z@=0Y!eD(ic<@&~{*#rM^A%XIv11036Y3395|1jE|Q0dD$vV*YDcsAlS2Ei^mly_a+CLuJLH_eeOuv=kA(;>b_DDq;LYxYy-%Eh!26swi}9)re|?Wxq&G^n$%f#gUDDqXD#`kaWfd*Jen=1pJLlBJI%51`$>=806Yr`c zIj2fDT`r?t)A?EzZC1VJ!;8|6sP1-l0_x>)7JQj0 zEX$5Sl@d5XYxz1R-xezm$Lr(-dYcv}V?H>itIY%H3e7%A_y?b^l@d!K2zgLAOh#M5 z2op%px|OTYr0vT3)d$jU-4hXFZ1Y}gZkRaq>Q+sqy#d#+EU0|E00q3InJ>R6ILG5x0ZuFc0uqgU~qoU2jv zma0gRs)6tQrcM))-9~;JG5c?-lD)?P0mN;`uG=WB?Hg$q)Z+;__luvD@Fm12r=uFS z&~{+oCSEC6dg@6wW4H7|Mta&)tXXvHwn5VHMDQ$ucj0yR?iaU`-*G5&FDD_khvbuw zW%Im@nYJi8fV6I9!!G6*%a^u_2`=f(aEYZf=exu(YcddZdbVjn!fDLH zSSv5D&DyC~HSQRSv0k%fBwW0=0FBn?L4t6)2+A;mETV=R(btm|C9W^k{AAbdR3j4W zZgB67KJ*j`5gAvjB7aRTm8{EL5BL(OH)*fm3Niv``rk;Q$lduG6?p(7FxoykB~e#p zRVTK0{-hs+emyQs#50`ahuVzi)@Ts14v_dZT6yI`tte31vq2T_la_ejApZ;n+ruH0A z0I@X&v#8CGx$jS?LJAcpq_Ts4M!cIhAnqt>i>(&v3TmMGjkki!Dzr^Sw1bzcZJFjG z`CvP3hvRh4pVHP&^U9_Yq-;&aqjNZMsNq7U{#>t`u4S z6{*ob0=abE)_idZSzQTiTFZygOe;>gX+$0``6aO9B>TS>=-CnxgifQXCQ%|Mqt=l1 z0G8+(U+??t4bL#r|FWR~$-`na@#ZdX(Is`5)zx1b{)Q-yI1P`cObRFblj!DLg-MB)u zVKhL2LW}b$a}7Kh#$4`T!oY1JYl`@0HVaC}rp4V;65y2A*WrNT(wWQ}Jx9o9 zK7B>_VuS!{h@$i)lu%QEBwbYyBYtC&9!}3{zPAa=>`NUq4%h{_Phjj(pLq;6&v3`x z65irn{Tll|2|_p%Bp&BfVkI1PiUsmEKFwU~wknBOZ_e+rWKoz7udcY9? z7W2S?n-IYEp<_u1_SxeW!eS%@{8g-X*)Oyu*`xHEqaq6JBfxi@q((x10upVp#>V`=N`$N+H7i$Y4 zFWR}SFHX!PObWPWM#}yyAw1>)v@&cpfp?_zdBb~}Ce-08RfeUG}k9nIxT5_2?EoD(9 z{5rDQ1u5gbU1@ESHHBcFvkf+Dljn2+eZvNqD2&DzvkY|#(wZK&qqz*jqW>e6` zrla(Lcs79qvoQ{3WFl?3j9-#)FzUig_eF;Q+%@(AskXwU8q^%he%R8ULZN9hV`t{a z**}@XNOuE@97!vjp&!HxZe%%?{jBo|UqL&ohXwhG{L67ArfTfQYJ{xs50df4i`1>Q zoHW@#2~!TlFzSZ)Pn-jwrcI}J#f9Dzq8(|J4yf+c2klk(5w{vX<@m!_oZ;RcZ6Lbu zF;zfB3AEvzB&_(OY)d{4vEcR1FY)P1<=RmKOJ?EHn(0QrJ7B(kVgv`iy%b4jT9s~{ z*uhp+|MesS_v#j#{k!3<+p$^tmF>LS7D4OZ00R=uQTfZQ6t8I-+xF2ZNY@sB-6%>f zNvbj}B>!7B>aalYvfmf?7et3a6n*(Ir2H6hQLUHc1CPF7$dvD(X(kKw*W3lqq3)8- zMU&N_aCup>_mw1HltK3u!33{4p5DrhwwFSp<6<#mGuLfi9e2YrSpQNI{!6ogVE$D4 zKLOr@3Bw5`d$ZG|h+)Kz?SENWHAi`%WS%CuedABLuV2%kB?GB#3`a_*;e9p>3}*aY z??xxR6bR2u`9S2;6|y|Q@xSPOuFw*zof8<*z4}*=(f<~y5ps83bv^55-HZ~=EgDUH zLCS4fU^oBhK?0uo!!B+HDCymgCj{=F1h+!zPbZtPMQh7@l${@8pYfO3`$$Zkt2e*Y zL+e=kdggkH;@wwzy7?&y0vA}o3F-{%ZPpV)@e>bz9tW398Q{LjbJ<_)+IZgX) z(&r)Wef1%;n~lKuN7?5ax*2EpNm@iEQCaknRmZIZFCV!J-?O+K_QZ=4lEU?`V^jRR z!vSdJND8WB#cSn<9Lt@SGNDr9UZs2^|S6h0#D~QPRc&?A1EzR}NG@ILD`$5j(D4B;ar{O$VOUyL45lm6F@d zDuU~s9rLQijztwAP9|TJX=%B9ng!w7ZQ;pSB9Fd8_y^boJkY(*SnJJMi&>aPb{c5Z zCUz^NtH}PY8#y=&iaJ~& z`oUy=wPSrt;D`qU1OY&u1e;&Gx@Oi0&K=78W=f4v!Qv+ypCo16bSBWjT%K$l2%)?W zeb~NU%!hE_j2P@AZF@Z{@%JW!jo>qP+3eyc22mb}iz2X{kCF(-0VqRlA%{!U08WQD zrl%>1VGo^Q1kOjp_*<>x7A57xaioEZfa1d8-FY2}#l_P!`t0Q=cgL%I;PmH}N^QZw z|9|6cXs)s!&-&*b=64NS4$MW;I^K?+ONjc-0+2#r1)XZpMl zQD0isGFAOndL^l!U*)yF*o0Z^wy`+97#sKDEWtjkXf;p|BS7J0@RCcG(FGdQOc?uQ zWzvsxp@l`IGr8NokALod#e}@_|4sQ(aPd&nWF_u3I@nI~= zakw{lh|Y}@-`BsuhqMk!8}%Llrg`S3lN%BL({bHm5Fw3%M^%_G=c;d=YU~@v^{ZAN zH`;U+7;Ck$AbRz{CiHz?=2$FqkNNYfPLlt`<&f9}qN3ApbmDvF3nv%Uqob@Nxv#9W zeg==akM7&@tHY>5y;(O^{Wx8GPLA|Ft`fG)!f3W0qHRp5CT4LYYD29WEotZ}$I(%Y zNrA)H*&X6juVl^bbGj)^Z1|uu$~fE|G2@-;yJmEcPhApqZ}XPP1v|xytb`?$M_8ke zxUq54{L5v}MIIc1xLA9=KaokmPMp@de1ug>fJUoO2*qdl=(^)@tMMp>TKGle?hEk) zsJDZFmCVyeH*imz$%M0XWf&{=u`)TJWAxH!Hw+C|)A|HdqMiL-uTFg5gE}gFMA)V$ z#VZZ{*sQ(GsnFZ*08aT*2m&@IOR_y)5BWXPOwtgE+_q!aoWHHUGp(NMUjDGUT<+X_ zb#R|)1i5o<9{$CUI9Vu5>bY)J!Apg+TP-%I8Kt?c4nG&9X|WS%%ec^jqoc{zn~@EC zYn5#qHp5Ft`qs3s;$QC*jNsT5Ji>~az28d_{uyWc%@Xdi6U~^?iPl%~Moi0&&LVNt4c{ulHyJ|4Ahz_CzIppJfQ6*;+LE)m~1kCUfh0 zDlN$su=Yy$OxXF!K_K{Ec!6$6{IZ9RqdyqhDF_X&oE*P9L=fP**ul3ef&DMc){?%* z+t%(7!%CgQtI&E+gsT5E-w2~nf7u_X@N zU~oU-+qD=wMBLRCrnIo`tZs$&jq)1RlMOt z+!^=>z0{V%r0p+yA>|0Q?$2ynw6`y*NnAvZ^dliyiPSBio3MvvA)b%5cc(iYEMvUjIs?;lrImDJd;mWNSFqicZE2FKx6yTw*%@ z3Y>zab;};QhfG%BpccAqTCD2@LJ7!~(6ue_)x*YdX*)|>(k8C$`CpeJSLJoTcB+3f}{sutxVWRasUq`oz<7V=I|s-ozi$mbbnc;h!V(+!NB%h$zP zeMz6lW6{MFW~qYd_GJK~o4&@h4}2V#bo;GlB+dCkRYc{J zafJXv`e_iID@hqR*L{40_C90XePl1P`ISYB1r7@%;m8lbuswG!!9)##yW!{Y@)lHk z`t52jFNXhq@ENz2@w{&(2R~2DL&pjfcyoQ z)9)&gHUl?eKfarb`h{6ftljFt3F|()Kc9`JGJ&}*F*ayY*fhjH=`E2qC-|vfP~J8w zA3!{(@l7VPlv5|K{_(rZ0z2m7eE%v7CVudWJ2_a+5kuw$d+DF)AJVEeIwr0NA(XBT zAv@=0-Z~kVKa|*c+AVE|V1H^dCvM?}v7pZcYA!*Q$g**Hg5Yfgk`2kD`z7v;B_Ci5 z>T)s+-qzDWfZ=#egC*XOA-T=E|;5a{D{Hz{)ccL3}bz^paA4N zWCR%(pB~IF91ZV^DMl@MSn>D+k)v~PjKBUa(1NN6NkcrOEq}17 zsPX_8t~HE_aniNKTA}0RDdknT={YmIis!}vlmLd5l(-Z(BSF`T`v9QgWqp&)nx+UxR6!XmR`NnO{%FQuq>-pZW}y2Rv%Y$^?-^LDI0>?k6$?9qFW6 z65t?!rSb@7U$s@mL3U-ef_E*hv?rd1|7kFJUCTH1yJbNl!d?1=2DmPY_>tqPzSKr9 z?Q)ls`%(6q3+<81md;Q&`Lz>xZ7B8mmgRJD-Z~JUg3kj~6W}2**Qmt2%q`OWET;3Y zY8X%Daij74+GO(t*lF)*6WbmZnVhVdoU|iXoTgY^ALKHd-PK`VEC5?IOj{^^cw~4T9T(;J0j2RA3F_;F7BYhv`z6);`SZJQLc_I7=AXZ$a(ADuv-O4wEV*O8*bi(xv`*q2B$MWceRMMUmEUnV z73$g}SL-EVCd9OZtMEpU5HcCze}E3e_L79z=jhfzBoK<)Se&tLD{)%Mq`=A-b8}(~ zS~$?W){16UjH8;lM-Ptk0nA3oGdH+LMCc;0_APHQr-@$Z`~DMz7UOLsI&qsc(N1{v z(Kt?3rxUdNy6Q~{FWSADAwUtE*uoP#_?Eol7Ao@kmW=o&D0&IQBX3-QiZ{u-7%&j` zlWA;Q$850@2EC2b9gD5}tR*e&wZzmChKxYN435-G6uGjeZT^B_k&9*`v5kd-W!AG@ zm5dg({EBI#cs5l}*n_?c2meJ)N+3@9B20i!3-_O&MfF)d=nFiOS|mw(FL&d!>|+pyU_2!M0y(ug3;tF?S^;_c32334Rj1Ej4)ZY5M}34-LDjcIv7H3S--2@4UQ* z2Z_mdFAU z!JJEAaTsf6r6h5@A`DI1(q81etdZTblh#zUG*koBj~4Y`q{<#vj`wU~F??VZYd1Sa zCe<(O(DM4sHEV?2|ISuFj;Z<@n1S>?Wfi*F58{_k6)nFaADDAlYd>fsvU$$&z{q|Y z&ZL~%aU29uymLp8!Th?owu3e!&80^kPE*U8s4R43JQfwvH^O|&#LPZvZ^O?lFuO%( z9e!9~Ewka0s`WhOKA|-%fy%3q@#&$%NkS6Uq9RERZW?>%4&HLYwKhKC2Ne*7c!oYJ z+|=f?0%NKfX8stg9LjTb(40&vuO+kHys4mRmk(EeP{{r(cXz7a6(Icn@gakh9Ukva zK(O+4rkzV(Qe7L`iVCYh-#Hqp6c!7B3pdDHHcuAB|NY3fOZquXX?mLhFfa7nK74!1G zl=4x_BT3=8?e<4Zj@-VBAxjYK*P~@fMiEJUTXZ z&iM@q35O_d@CN-C`WN=@)1U+HlV8RKDSw5+Cm8IYA9%IKD! zT)>+*Y9zNViKOvMG{7?=8&qgh8aNDn%V2^E5wiMpwQx z*$y+&96{b52KL`iZmB3owqyQekIrpUx||!dsKY_`1{C{7a)vTueRi{*C3s(z?Xh_IG3#j8!l2t>Jnz zl!QI1o7U&t*)vWRILwrcJ#(~wHIX=z+ct^bh3ur?7Ym(C&_jU#Qa*_3mC4BF!|u^&mLqK38nkD^Y1ZE9Sze*JF@g-(It*B$owHFTu@M0gG4LH zy<8L!=(}(X+$??H7@Ni2(UQj7-KodX`e1Tu>;$i68q^-4!JWd4@b$rd2<5~vM+~(d!YXY#!Lorn?}iB z@Y{)8M*XAlx7qnrQS|hjJbL!57%ul=Yc2sku5#XYIqaCGN_y>#Y8zwHM)Tn3tGSRazWO4X=BE-X6(ZIHwsV1v6rf88?vX;*O!oR z!{yBV$JnzV%LD2QWMpL5CKFN6egf)Vs`+ML+T1&K(VvwA?w5}z{PtAj{T?j6Tl#Uf zwM4P4(Pr%?RWD1;kf>Ci#QpCJ;swoj#$&_J`J7V}$bJ+i{Y6WU!A#YAqiigy|D?kn zw)3G_`s(452_1=JW(AY!^dNAv zaAsu6Sj}W{1sicEnqDlK>3JVZ(Vdm)57kXQ)x7!3UCUaXFP^!IdZ`l)U+`Dh9H)DB zyLUea$?M>fy9YtvH*7Hm23N!UP8eXBlS)cL657baV?ujH&CZ9_8NMh8ho4}5TVZ08 z(46LU*CcDC5W7*BGXDMK5WZsruJT?~I77;gz?lQgFVL}GlGc>kGTV9xtT3<#f?^m9 zCi2m)YkP5{pbZG)l3VTA>Y-@w3fT&1dy z1b4ao+T>lRtE_N`xl<}llO1(sA6)Sjxra2dN(m`N?^CL4atm{1S8v1GK8qYCjEQs+ zp))vAzo0&JCoAK#v!itd1NzW(mWGL8{=^bR!JxhxS9H-*dzQu@KJ~EoTZZ@gzl8@?RBu zMccE0_16=vU6qS)d`?!BGh-d8N_Q&O|9eyMtNGm)EW8)Zi;?9#&D!kM2vfyj3rXp+ z6jNIEXXoasW@uJ{$dH)0I90MYh-{{*qWU^J^DFEW6cpgNrKuDr-GiHK3G?k?RyHf) z2c}5Fbm-^LpZlC(Lo{SS{|@Az68;Uzum>}B~u??t>Jp?;|FFkRZSwS?b3p2B`m_P6t*r&JU4C<*P9)?)hm4M z6`>H3A0!aF2|Zqj02!zQ9TG-_q~N5%b-_<%*27d>&~`wNc!$6Da`4l)^WtEjWaFv{)OKk$rLO5mDO+z@wy(Qklk}x`CX2)iB*YCWxN3EX! z_+!;x+O&vMcDb{{{kfSJ#Lby8LtO^lfm%$I2WdE8CPE%ZLUQp8@uY!z7hKht(PV`b zyTwM)vd^F476)8X)^`s@+1uIerL*8sUJ^4k{b-SjdUCH#mxB?V38R%^zP$qG`pC6< z+dA`zg@>*6*`aRay-V$~k-_X%&1h8G1zY^L4Y%Vu&y_DF5Au8-ypKZNFLlwzcA!{kZ*MB?}d#bAN^;0!e+ugT%5^=f$qrIUl98SDYwAr zca*#!p(xJJ&p$Xg$RrVTI^`5Ej0(AU5!5w79=8wOv}omN+*kAfhh{lI@a}GFQyn}g4%P}L6$d6V>wkrV4=N-e3X-I9&0_4%tK%#Mpu;yh z3hp(3{UQHBfw_T}wcUH^1Xfmu234JGiidK7BP}sJ&4NiUO9{IoS7}kdgv|DalLyiu z@u;BJuV>qoOhBkJzj0sEM-4gBIryIq;3`9_G>AV#6hH~$EFT_Zg4h$)Jd30W5aLO^ z23E_Zor)~+VvQ-QWCtU<9n&`=J!ej&Z1_rl8I~z<@2aNiN-Avr3HeOgcQm3Is!EI1FE46Xiw^HHq`e(8y#t=$CRwzN`?`db;bk z76UXVV7f6JzcE%sbrZyFwr$&H*0wANp##`3P-HgFHrYY)?wV4gb-DNZ`oa$0h`Pci zTyrj}U^#g;5@I(n3zoYG_53?mn&THUGcv%i_YV#(AYRC*J0~xXn~iM_w+?CK0n=@@4(B9!FKi> zvfx?6M~}U(wzQq_2V?*5>3i}ZeHMW_)3GrkF6;q=tr6Kp5TXHLBJfoO4yzeSg;A1+ ze~A)5)YrETem}x0RqXtZT-{P}Ac%g?amON=T|Oc+HHn4zz2d;2|DS8MQ$54Ob=3s? z3U*<^H}gbtlP(9RLI)`Qx|DEBc5PnbPCFtq8BfhW=qTySW<783Y9Dd%sIWxZgo=i% z4$JN#3eu0KO}qE8)TGLh(cnwZJLGb;QEpyA*5_z_x+)WXV5Y-e>n4ww9#WznZ?zc%mf5 zyXyU$HM9M@eP|v>T7NQ1-RRygSi^yI%e_09F(-oq$yVsS7pn*R1p4UY)YL3)@KtXU zfwTHu?zP)twQ@jq}vn^3;>wEjprR?^n zR`s2vP${}w=S+pX#tLm5RWw{hB#?_p11Vxq5&HP?Bjk$wrUM{c_GJCo=Zl`N@BCa@ zk1EUo8oP8Qe4IC!|5$!~$92D-q5RJe7uD@562im7U%djfBRQ?405nG6e!G!_yqB;C zXpDj9PHq7c@c~&8O6DdRyJGj5IrVbb)qAf@^tkFc?aFYJ`{Uk7-L?onj2@va@H;}* zGbk6p5;4zB9Wfm*{vtT+TJ7xMGpy>sV}y3#f@5Vod~ToAX0w@S=nt40p^MCm&=oC#-~cN-TOhm>zXUT&L!woT*9*#UowQ@bHkdaTc% zcX2JDe@f;$0h)1PlxSm&fHaV`c(Exy(g}1mD9Opw?XkvsbD;gBTwo!icMq-uN2<N?DdI4coA`yL(36DN;x;2-2n8Diq}xR?nh)mA`rzdEg2yy9!-!!!UB66%?RA z7U3W@&#hPVBA(n9W3ShTOCSEa@E_o)tmi8Qaad-d<4}~2^*Q||II^-Yw>1=2MCGq# zJKZ3iYxcTAcVpgXcP5u!OlLG4DSf{EsHhyvmQOIS<(~*woVBMIjFgDk%z%Vqz6BB- zxes=ex@N|y+Jil?&oj0>cQy|+d5v!-2ly`0(6Py0QEfx>Q_5L6NH)Ig?drJEe*Ve^ zfmNf+6*8V6sSSYM9kT4&6P$7y6S_vUX@f2fOR3WM&gj2`cls+24ifl9lFvR!@--90 zD9>tJ-Q3Kyw9rz*9&c)Cgj=$-(X@SLK|8m=snqoKVw04g=uwtP#mjZ^17^i(1ou*# z%a+9UX!0ec+fUR>2uAOHi4nB4`1Z!_*~2^fKH5AMzAF06CK9YC`0wVXznL)d{%7X# zv`Zty3{}^2GFp-hd7AV;@KkN}9qCom4nr|@j+2hr@V4bI&qrJAgOt#X+-PPaDEqdC z#cV=9K9HL*$>Us@8cM3IFJW932NE;G!R$Jo8&%+cz!3Nd;1WPUiEp^d{UrG1uPGId z0dLo|KSWw(HQNf(m9H$3WmnJnwHfe6sA9v13j?SjsK5WHy?_|bi!>5&7CBMm0E#<9 zSOv;$tDub*9u`(qTnu*%5_alSr$Ee48NiqO_d6h&0R3i=mi{UibxIdD<&h&;iOfnT zzEJDzck6X|-u!{f-j_EsMNAUq{5Fgst_xfLeBq^2rn#}!Xj66OW|tuKLd*dKTID#v zRkC{b#tk=D*RlW>rM>(1fzJV9F+?2QX7)SCc~cxc3i{G`h%Z6i#4k}G==L=I*^oUy zLL7IH?yA&l(&uv{FQ%R0GIY8%7yB}t*J)mHVPug;m1{_;r?76(Cy7VSM%DQCm)NI; z6sbCRTpC@|_r|Y|*Engsu&nJj{uwkO)_uHUEJ;3@#&NL`nScW715IKOFZDTZ;z6g` zKOjJ0vEe!*HPEi=%;KB~II(2hJ88;wIj><^QPwJ zg%;y?;fpuWn`_Q6JVIJh(mx+CHbipu1FMv8oS+Ox;gFR@Q;w1r!>F+~>m+{YTk3mD zhc-a(@Pfg=giJzv3?2MRz>$M00vO45)t+MeT#(RAOG_gk09g)*8LqSf1{#vomP)fm z8#+H~tVT&^r6SP2pqhMlK89|x`fK8;BB@JD1){)^uH*4aLdoCpA_1t2B8&0Cj zBXLzk=UdPl04d_7BT@KilRbO(fKtv8MhOpZZzqsQ0@W9HatH+{8l#`fC*4drLHGVg z;uEuXq;k>l(d_ix$I`xS49A8wd>Fs+WXC^I4IkstvPzx|KG+)*2=QR^J`r(O(f2-) zZ?_kQ321&+wG&W_Z#?yFJ$510+FLm- z`9(fdHUQolSZv(49-TD7Z^L1xD%HDox7FjHYni~$$c&wY2V@+F?D^;aMsD?GUvWFt)RP zI9t+x55Za+9xKeW2Mfdkc!q;3PRnDF00JXTVjw>RLq-sBs<5z&2B!0B6|g}1rtdGa z^5`k~lizM(k*3%BOJCN?bGZVc48O5=4*v@*>(LEbxl zk;!}{fKtqwGN;`B{{7nyP#tP@6)h8Mg9LD-(^~gdXVx90Mwflr(XS*guLDT~)FFS0 z5v&5NIC}Ze;o2QPFhk)ojS!Vq<;^#br+g-(qCcPM%ExAQXU*Ii2}Z{*J#)AjHR;!x zlG<)Ro6z;|AtnR`baZr(TR%vD^dvdiU}*s0k*Uea?h;4);2OAdwSF7kjrw5x+M+V+ z1F9;mBk<>EI;!TVga_;R8$Ypj0V?|;=!#hG9VOfAlzbuCw=rAKFyvm)I{=mA{y;h|g za7gWHeO^pwvGD2^Uv={as4&#ry#RAj75iN#I*35;|0)KK z`F1isBv-sCB2q2qd#nNZqy3QdwcZ<(&tTxGG6*Wm5A7u4+X=C%YFRHH5IxByo`Z%D zY-4W?MrRhrdzVHND_akce1abm&PEWZ1gWWq@y&pMO#@&7I-k-18|}-zZ`AP=v%k5d zT4I`v@k!oT|6em#WQI*;$44i9qvfN_XBFL4v-mg@o|NSAmj*0zT?@LtZB^1J92lq0qU7Y}!&f5& zi|IHrvXe!(vl27S^z^qYQo~H{xkY$#tjO`J8*0%3QuAv;tKy@|m{WyL zonySY2e8=*s(Rcu90tB|eXgR%>}4TOA$b?9s}GuyEr6v!h2Z+Wx*4?H`Occg*j#BK zOj)z*lqJe=E(Zi?JISm}U*yeo$(XtUc3p&xFhVEk{2JQ$ti2`E+roiQZ8_xO98h}Lh?;**hzW8$Ws9^ zg9kEf7qpcENt%C##C}I)U&l+UmoJXh;oZn#11eA5*8RX-XdOOIEAKzvAZeGFQp8sL zXz^C>=Zb=o9zWkh6T~I?xi;^XgFf`6CF^Kpfe!XZW-vdmYan!NzbTq+X=0ka5&$uE zMHi30#q3zfLlop0UY3r?lU+$1J&y;0Lok1j0S5+QW6&w}dwkGNEkiFwokGJGGQk&R7t67x+s5d1 zX?VHJDb%}U)Obr8kjd>~iIR8rpjknTf15KpBg>@h@3b|I5rf5G$7!5^XGk1R98W2aXm zEF*+cpp2Ml_oc6|1jNGh`H%_TA^C(1E5%P^B7uCsWvvLTv+Cnha>aK;9JBf|MPmaD z=mU@z@lT7MjUYJ%xy^KM;Z+8P*!1)}iWa7(wlF5}%$YMFRoDa;+Q7dnCq zFnfGfmR#HbJm`@2NLypq%!uY&*g$7Ff7waVqY50#Yx zC!Gb|K|t&7*6oG2|L|v9V)Ws%&gjxudx~MQF-5DqMii173iVDni*16iCZeLN(XPRO zxF?cqHXh5YoI|mkL4sr!gUh!4Uzo%Ho`ERtu75Z2<0Ny~a)q*mgog53PP723{hY>b z3~E4$D$-I?GicN%jGw$Hix6(PG~H98TMI?0i(O*_{bTc2chj=0TLqj#BjEX`JIY>- z9v&Wmrhs@Vgoj?Vf&z4O{eXoFvZC6ZZgK4M*H9mPf?LLl0g!74K$o798=<7z#h@Ls5g;HVM zY%mUP{QLU)>Vhh0(t+MNL_M`ZY-JGK7sP^fykF%!^QQ?)>PAYW)uJaho0{sb?;0^3 zHySyWdbO&Q{p;FC1OS&ap^)6zIFiPmY^|r!9R5rmRXgjzPe`a;o+T{#ISC7YhHZ)n z4nALek&voTL4gwT2&HU&t==CZFD-i`PQo@G^|(K&B`Y%%xOpU94xG` zQ-qM}vfWNfNL@*E=MC94veMHVy1NT;v7i+kAFl%w_l}IcL>_yn^V4_BMwtV_Jh(`f z1jf97F`vThD5QQHe3hW~b*J64F~lT0^LuS|{67=ztdPz^Ef^|6ut#S9i~bwQRaB;JzK$_uee86q>JY zleXJwYC9CCw7&`o8A}`oi~-a(kv&tDC68?fSu#BrWp8nt%j@euhMF=g>6n_;WS_+9 zj=~W7kV!fOKSpHPVAI=_h)GFFEI45_+;H!d{E{A7vdi=`(XR5d5%1l*6d=%*8#hh4 zCB)}P-ISU-A+-8Pg>u7Syr-%zF4&Nib13?XH;z|`e7)5zF7*h<+HCZ7ggU)lW@Km! zJVnZvJh`#;Tu7%7**G>F&3)_k(u3w)^6fIH2YW!f=auFIx7-Jz)L8ST2FQ9uA zkqd#!K6~lX$_pt5^AY6nY_)W*0N9zwxq>7lKS$v2=2I|OCqZ_?{a{7iuc z$w99^S`-BO?88GNm4Tpz3fYdyKzd2jxj#IN=$G%FRA$WyUsQx#oM=8}bQQ`s(LZ$m z4KfWw`T6ThA2za6Go+6Lp64=>I?uEue+T!*;`zP4$!$nsY3AH9X<~r3wm`8!CSs`>71B;IusKJtkfo@U7tU|627^xUt295@!?wo`+SwYOY!3)QE#e zzD3#1D;bd+-~OLa^A8rcrnW}#-iSZ(d-LK-w3wg0KE_Yssk=){5`G}_8>VTtv5@P! zp0Y8F%lgr-A>QnB0}7CbG&m%Q?u5F9RCEVopZQZU_cIqVNb=Lf$f=cIP4sE8gedf}>oTFY#?Fk}sj}6zHyvmHOp4c+AB<740AzeZN>;c4YQ+0%ZC2&9~ zz{KH~VOZ0nOe1GuB<#KXV%!9LkbI%LlVVeDu7wVoYQ$`hpKGRhX!%A+S69?_(qEIr zI|MW#Y0L8ylrbsz^d)_L{YQ@;wQ>RZCScwbi$(Ufj~CJY5_$?>(cPn~u7X!x2;Cx; z(&kElJN*&-v^t~*bQeHf4?dQCU*K0!`xH#|D5w5~OT(RH$n^eqA0riGQn7|>3O*NG z91(p#D)~@;eMIjRac9MqR`$OWZ->y%*9_sSPzn9wXe4J|qx;^Uuf}1Pi0qb6O5t>2 z0&{W2t`?s&`psP(&7L@T9ybvl5W@kkntOZv%II0Ie<}{`t`3p4L*6(;T7(M=Pu7s1 ztoZT;(kZ8?sJ8Q~SFbvA&AUNlL!@S8neQOvz&n=zus+5!^(#spYo6djOVrE8PBq3CRXAh(r5GY}2s_>@Nojb1&zXfZ8zw`a%y#Q@jZ~y2Ts@1Fh zgfPaXEx?MOu)hG_L(*W5lA%MuQf6&0Bkz|r#TCcGP0#o^md-8f3k7ReQxiB0&TTXBe`MJy%Bq*e0?(&mV(tGq7tkxgA4IUhf zqHGZmGG5cycAtyJ*3X_GNCv!VikZ6fwEtXFlsCsb!wJ=+{YmSwhudsMNmT7_>TNOb3QON)WA$#Gi&lmQf|30Nioo3a?b@C!gl=nvuN zr_9`~@#7P*=Gw)s87HWao!^mh^)nsDV0N^JzsI3Fm&)V-LvNZNA5jjY`;n|i@t%VG zm7_xhrwdo<$`B}bYxwEq7A5(Ew7iKBDuHo>+j`s6wS~dK0@r-YMG3(T%(+lMfba0D za*PbZMx!2ST0J1O-K^B{=PYsSBDm+n=qc*7HGWyKKdZZaj`6#=Mg{kZ^`p|lF#)}_K z`gQc?Ca~QY3HD@%09w&#%lxklh}L&biAPkS4q*vipMq0wSt?hm;W~Z4&a(Tlj3fVr z{6DF9NgYK?bu0X&hPdLH7H2DUg2H4B#Fjvwgjb8&WTNGkW$r8Pu57Yo@a#BLG=tvrQb{vK10p~oH@Q>I<{v-SB@BZzLMaWyF ztB5c<%*0XdA@J6usY>kgZ(81J>P}OMBR>{lShDamWrP)n$zMM?Vs}$vAAR=%zyTvy zT__YgPoZA6^Zb!U9w>4vjj??9rzYSKL(D#L?`y$s7OD{IgqJFQzMqTDxkmv32TNsGc;xGeLK{ zXY2+$c^=R0vQ!!ju;)N5UAb}v{AhuyVk-j=!EI5cMR)$gPZ1^FJJGy6ad0T>&yOmB zs=41e0WBZUAVV6-Vjjk)sgRcq(n4KOO=yZ?K>T>R>fB2V0 zMHaH}SDWz~u0TQ5c_|FyK%xCK+!jB|g(!|WU1DWrZ3>vP^K`vi(xe~Rxh??^lgw_A z?d$imMNRU*4kD%J?Lbho0$F6i6RHVJFsv9Nbm-`CdgZ%sI8V2wfg!6aUY72A4UJMR ztEn?E2m4bKb($W7k$KS-H#W|ICXN=>z6RCN(eB=Eh+l)iJd(q}>s@s9zuJa*rcWcD z@zf6^ZH?tBYa7#Uk1i>PuS$SKwcc~GI#U8LI#6MdionNz!fj`sF`P9|R~ZL>(0xmR z@TT>l<2N&EPmh9Iz0Dqdy!a#S4wBJvKf?d`4_Au6Q)MtWhw3sBufk^J1PwUI~T9)dUvP_>bWj2bHDM$8TjRI;+N z0-i@lCpsb`;_>4OaF!tcKl>@=S5+_R5Y*S848Uhemu0#3HhRV=VbNXu$^0~&o z)@}HM?}4whGrA{D!Z3y-FaRgArquI3a9F^L5Yha#-*mII4j3tj44-^de-Dm%t_plh z2CK%Kn@^A2((;P5b*bY`t%9tk9&y0 z1}V5NFCIy_ks@{p`>m(l)Lq6b!MyXzbP1E4Z(C#MzMU=aD|m{HBpm0f_WM8lt4jX* zl7D%^bs=YOy;QaLsAjQ_^ZheR=ROIvJW&y!{@tE(P&-ZWK_-sDrq zT6bINZaf|wisfbpZ7Na&t+?^xb>=W4lKQAsX@o6+Lh=jvc`_NnCA#pHIqbsYvv>DyQdJju>NV8Q{Fvy14n*^k>AKk*p*nvxl&Jrd8n{SMsYPNDjgn$<0$?`R| zHt{h9V_$)-A$$ZsyvOEZOy+SGE~M|la||<^HOIQz&O*P|(p)*ZM)!(Nyh4PI+6)%s zjXw{r_3HSCROMYPKZ8q`3)R`wyxurSAXs;)0+1J4KOr<)>n^EcTq_vP;`n zBZKw^aEo>q=6x+uqAVWALF1J9m)}S!<-CeXqFq@PO0)hCw2SF+*6*jsU#)43vI7Ce zUh?28%W(~wUlPYH7a5TF1jAgVKoc=Cg3(ebtcy!4Hq`O7M6iV6~}VfLlG8d|u2E_;3d50}o{hG$wtfY!K*Jcf zTrix0BMB}ddBqlZ965|AOtQe^G2#h}r(&GEDpbPbGVYdE_04}0EwkiOvU9t6gLQMYZsv&2y)`!TB;P4}a(uC0)NOE0E2stT&uXO`3;TdntdlCe4) ztuPOl(cGA>tgJ-rSI`Xu-pNy2pXz|+D}Yty(AH*GpMMjGBk2bfD9rK2yGmo z-k@@q!DHDPJHRb#O-yd_(`_f^|y1~>h2c)`kklp?JRHK zSO)`gVeeH(An8pQbJQCj$9Jfxmcave-YC=qD*?xDNiFsESy(Vi>EoJGi~m?5=W(RNbjlBUY6uWsP~ow_ zWF*ALhZp14;RB2$Nk7=4O6(*P{JS2MCvL_hKMF1jXyGUxxO6UY-D{~HjA@7RYx&mA z=-WZS^*j!ZI;Qa(TN}&fNpmbh+=n;7T_S5w&+P!YfQ%_LQsGYE00a^vr4tzHT z5?iQ~+}!<3u!kz)Qb!@YDvFn{e%CHF*?kPQrw2{u(`e}U z3!(zRu#u7Gb8!}_wPzQt&YRY zO@DZNxz^zdfq$lEh9-=-WT8cDsygWnqe#wQwBYIkX*GxkS*mOF(z{Q2_%9FCEa zvL!+c?HuRP>PRDGLB z-asRFwpxP_Bii4so_WQ^a77m&tT$I5pEz*>(3i$`-z5qq81_x?gmbXjQ@I7&F{^*z z+rnRd?z5%2G#E)-)@iRjTpCBfs_Q{W+!A(&Re^#;T}9xxu5(lD4S+(^?HO3d`FoH( zW6LLgYV!Q5{3?}PW!-+(CoB1kD}t(PrP_ zIx6*ZWY!$CgqK(wcm#xOaztticuD3yz`vUqUPu~lC^|sKd3vO(aiM*r`AD-%o02fdDu+GjFviX8UFdCD!?A>aZ z4iM@y%TY}dl$A*mCE_|U?stRFw2Q1|*jp^#ze?_$c25B776`v+|nm(AFc`?JucR2xZJifDk zPx{P!x*VOH>`X9H(O;dx^;ePDhAajjdFyH!bALSi%NGHQ))?d;&<-JCP3@1BGmPfAG_+e(1B= zp5r^!yF3oAZfjV18W*+hQ$Ts;84PAhB7ibZ^~@1)u7LUp+^CPg|Di*Na#fvd2FYmP zrS?|C-kEDn`0DL`y@()%iucQ1v*&S2)=ohdZ*mvBbJ@81XZye*1Jv&^?S!y(PI8j> zHbVo4Yz?h}9Gbai(C#rF<8f&3-lzWlV4>;}UG8C4Qn$e%;pz2d=z!V?O%SiQd@9yA zU!0-z>T_8D1L2%qa9i*Nz3klb#8}hLLFjhb`5+DYT#6?yHKbg00qv>R_%=Nw_xg!= zWYPv=9gMyD_>g7ks)4gJ5unQQ1STL|J&Wa!KQ8|llF&C$)(bKE^d1fP%lX{8PTqwg z6&#Lzm8rgpM-sd=X}Y=>f9Osl+kaMD{;~$Dl&(m!BkZDwbmE_3|75^>7JABx8XCkk z92PubI4CNBlg(5?BFB37e>Br%+DnW{+eHl_K9@NhchZ)36LMX)y*B*L;S(q0NN|T+ zF5E9B4Q8ZIy3iV|`JR2YB_tSV%;k`@;^3^Kb(j_+fe?5YEmf zky|gj{NC$5C+#!ZE0m7P7ik-BOm=Ver^U3IrUrm8!RTlnyeojLqqS3}T`)tk#|IOc zWi90LIp{P5b0jq3X{4}q@|DnIfMASO`?+mIMV9;xrjUjsO$=4(SYiS5`@ zK*+L5>}-wHWCtedLUqsVLk~xTSf&X5zmxD8$ zwwim98e#B~Tf3;C{blOeadxl6;4QwM_{jdIgf_orpV{=$JG9^Jm(aVdt5h{H{BFE& z@)$T_zm>e~BXGUn-$n1!9IJwkmBsKeJLp7_}4vmGVIUia``q?Dz;qcVB3*HbS!tE0Vs8og6SIGc0(d=e$P{GA(tQ| ze|yRxa}xw@4ZtFbe6cAm2))9Bf}m&A_V??K?bH8=D5;(-lRdSmd;87IC%+MFugF0w zZotkS!zdce7>^-s60eJLpLvH4?~YRZgFgU%bhn7B?U5Ulc%%XBU?C#Q@T!qEfG;Q|`n zUssn7%d-ICmuz(2fA4yr8-IO_xv-z9E8bDG=v>tcf)_V(nbGe@OI%zW~oci8&PZiRk z2*BX6&?Y}{emW)2K!km*tYkfZKGnGwo=UI6AAW+7jwIA;_a))z44=}yRP|ypD>usN z+Ao&nY-z&$Y3X7j;`iWZo6c92&I(Ac^b_1-G&R-0xApp7yh}QAWTD*{E|4@!dztzM z?2Qn>JU9In+%(??=)Fnmjj1spiXx zcXtFK3H%mScaN(vfKQC1-%;^Yof6`p##iNIQ+MIk*<8;5#UCM0o7pP0`SN_lH-sV? z@x)>C*^Ja=XZF=~@{l4D7R9{c?`}sWH?Nvmow_Q${iiQ@$v1}V_>_1xY^s_0@Y(=^dlg`0L8TlZx z6#n|vbN{CH+klv8qq0jv9U?w02yYLs!AwmlLK3BYJR1jK{qZz1rUupGPlNFbAagD} zceABDJVPc@j_n7fCOw|eBJ0>7`#`_mY|hHfn4oZ|z1olLdH<<%00^}3u&{HY0pN3c zclWBDUH0KqY;4k-3&^qGm1JI&c)CpZkJU*vsojLc9Q|-a)ezq^=nm?323$M?xmrjp*hm#GACj3IcTdeK*X+z zP+hp*Z!cizu-^q!aSKi?6-H|E;FrAB$J;dzZUn(Bw^-;Hn5|bxwggz)I;xF zkfMtpdi0<{uM@HtT%Zb=6Ae9|F1Ql4J@er1S5Z}Uc5%^+yB_vy?aa)K`46{$E!;qN zqIp|zYMsUlYcUpZRn)j8)t*G30p%Nhwq!e>Fbm&OVDrK2@aD}MV5Gr75qyH;uvWAdV!A}3 z0{NrS(1O6xlJW5HJw2oQ&l7+Pz9`q#Wo##x*r*+|b*!iA8Q|D7dY|WC42NG*)f=Y4 zyaaLxMwG;bBsv$pMV3==dk#TXg8bdiC@Jjv9e}5~>510VvkV-yVqmMse9x!1gA4o` zp)VARD@~Ko3_dBW-<-b9X>S`)$V#m>1yc zx(*P{1KjqT8*A|O;0->|`u084(TL=bJbBVRePX^QRE%gy`e?r8+ovn?u7Z>cKRt3! zS*yi}&3>7GFl`m-$7acj(C5IFlj-nTWW7PRgVGR2$ZSIO>5R1f!?22L5)vCgC;^Lx z*oZbf2(x0GQ$qbDl1skbNy^atTl2r|$h)O>Aw?zhdXssHQBjAoNYo{!%!C7YgCdOx z`>H~HbCU?{zU~J{Si9t2=ge5JsmJ@An+<1oyv4?6SMuAn-+z~+zT@uUY7&k+K*jT+ zW_&T)smHRXa4@32rnr0jpnP7t6IRC(>Bm8RIP$76^T6Wo)r{?iGuPaBc(JA8bt ziqJ7R+WCzZ$J38M%NOwq9D+c-8h26SB4`9m}pudTkQ=HE4b zL0lZ#q1^7D-g)okZ`{U=H1=$tMLv$DG=EuDuvTN#qNBRl(_x{JFW|8HF+eMuF|>|4 zM^~*wx0bTG&|^66iFz#^F`p)&EaN5RlN;avl8tozqehs{fn1?3iD`762U&411qGw1 zb2j@jXdoz~c2lKUHxK7$eeKw#sz+CFVmWxbqz#@ZhAYA1YNHD22C$|Km~l>Icu;vo z#bYlok+}~$AaNA}Qn-6}5Ge@YUh&OP?0Q@gp|gJL{7H4xP|vhXk4nh9zt~?k|AvY|CD9_#S9r$o=ZHbac=ydrM8t z;adlHQp#Hf@OCKv!7{_l%z8Kw7If`vqQ}nF-D79tM9`ClVnV&?i@JUr19@x7Y{bpn z@MkxS(-&qq!QxRzJkL=V!k2XxV` zLy0+CFwVeq&DwfC+qfB=IX;zvXN!#3XRpEVR=CT!ZtM{8;oh@-vfb!0<&NhXpN9kz zG~4+2?6cYZ<(YFz!z-N$=g7v^{v^ocv9YIld6ko%?PH3ED0}yl*QZb7R5Lr=%J%I2 zzFb>7cVr*U*xokXg0)!}W)A!q&F9@Ud|b74dozsG+UPy50-iAlh+c{@P**Sr;13Zo zT)e+Pm}1CqeCba~^JiG>WtlE1N%(pRcIwn{O*TkviTEn$p-WB?ox$qCgQG&L&p)LjR7#U=S*ND4GgOLx@WmNr$dP>743f1lW2&l3$Y$h;{%Gmi ztk2V@>R9&@y1a3=Y-B##>;BH{rtXg7`q` zfoL-j1uGl=>J_3wp|QA(G~C3*Veyp}SC6kP_b(P0X{Jk(*VN*95_{HcRKIhf-N)#j zq`Md?YH#>niG?|RwsYqVRYzDN77~ZEq{vrXJ`&=QJa+UbzgfqeCOa4>Ky--0Kl34Y z>3~6>ylb!TD}#X21nTLR^~6hyZx1pbxLU{8KAD@^<6M}%HXi41d}}@YZqk_Nn>Hn%6vi?bqVUDILDM^_N&F3T~e41D`;7f~XEgp;|WkU!ZjR_fZ-rc>(`sO#( zfVq}N+4MpgKmimgp@WLMD8o$1-L8d)l3MjRwQ8-22BV(P>(sLB+5V2~()aiM4&Q9) zqer3UgtY6ZA8AoDF_BjUUhc-9Vc7w`yq`LgFbuCDTua<;y+JY#2@a_$q2rRMG3jgY zak$y1Gc#bUL^Y@edvj|ok!5*A4neOo*JFb!%Fpg&(&1p&dUsZBW+wlko0D=uc#;DW z>^*g?-rgj^P(ZK-g5$+zKBVVQ#3zTc+F|(Et4oF+|BeQuUHPo1<@Q44#y5qvvoq+p z2zgj%h7WNPB9G;KAyFKn<3G>BG6r39Dym=#Nqfxb-XY-vaT!&7D?a@jL72tQ#ljc# z$JDQJNx|&c8N&kbA7p43scLJ})6h&3Ws!$hoU*5H z?&eI*o0uANzG>^0FtbYV;e}e(lL6loM_P;_iGZ2}qUt4R&+rrION=}U(<90#JXSu# z#Ha&w{59H|GxB6f#gm{vRkgXfdSp5YV}koCc&LuLHiE8iGB7K=t=^>Xj;Mu^+vVH{{Mq}gP?g0cI&fgyM%_@tYXksbB=N|gVxoL550A89b@di`4ciUoNv ztF^K_h5mdCJU3UP?OjkgmOiQ>)?|QX0C9$eRCL}|SDj07Ot(AReWEom}jRt5RxSDjPY22Gv6^^olsgCUVW( zv}jIOorkkB@FaRa2HM@7M}yUZaNXVAq36{vnsvO3jg3`GkSlO^IV1?SI*z;^o8?{y z?A)jQd40|)4uln+UxCvNb9d!P*s({vna0DX?KD+|#OzxF0V&;Y*0N-MsSzq|eqv2W zeDvo79XI}oT>ad>o#+}992^XlMMwyX*L7K0`wha$i_o3#M<)Ob4z8zQab_8!uJ+F98uhP~82f5NnyIz|J&)qL8(7S{t zEuzJrbaZqS%@&%1zYqAglic^NnVXwiS-mUDoq7(EEF$kvQp!Y&6hu^PVXA<;uY?L$ zY9|8^N!XL##mD;-#03mR49QXQ3hY;zKR7O=vQg(}t_j>n2K{M_0g}=UkO%m8fcnDa zJ@eozixzRR0oGdl35>af;rj~I8^|%c^T89ol_nPuFn9ont3b?3`q2i5hj&drW)O4D zPfxc13LRSMr%Kt7>dNsYL_OHTJ}I^VXr;zG`7dLF-lm$)ESynehex=O8s3^MR&IA$ z{i$&;_Pucl^QeNUR`c&D&(Mj2;7$1Jb@lZyS&uXvfEKgtD+aP@9}^6^X>M#ha`b3Z zfF?JsA1f;>cI6A#=G5(BnAlx8bO@1VVyRyv!;@Fm)$oUBcyWDsC}8!S08PGTd68=I2fC|B7u?$xp+X&|_<;ZU%lFWXxTCXLh)a7N(^wA4UYH^aIE zc{MdP6=6|WnQ7B3E0B|u14pL!LAQBT6--^*MZs|L$dNacRt_^=_%727JdObGjPlD$ zcQkmiS5rQe6EZ%ra(}{?M;v=AxgTCt>5U2}nj*#h+=ZyTrWml8^6$>S>%qszC;p_N z0Y~DITP-alu2r8yoNTb88(|U&I19ic0%K5&Y-}>=2=T1G&!3YkCrInt7;xgI%pzu} z@A^%8;Y-&T43C2D>{|KZ}WpArH+635JG9onpuNh0$s_yHy29b%zpv^37}HZSpe-38ycu zhTbJ4tq0-@-;M|*m$346#xR&C^%TuF(aJ#-9(W|g<%=YD0kvbFyFgDo?`33WmYvEg zAh4ea#vz(T7ta<>M>-ou$+Zvj08>ICy7WA*r*7G`zXHeTJi*ku{9*b2$p8A;JzDDA zS4vZG9CwZHqg~}C`_?y}K6YPhYOE)zII#?8p?gJ$k30A77jwk9`F*RkCpp}etCgD` zckrrHWqDk>LFg8jp$;)Q#HQYS+vYX6P_I2H+D#v>LSrrMVEO5)hw7onbxEC_!we6+ z>Q21LVr(iW9dhljU0xT6rtEjX7u76xtw#IaU)6a_Dz}Xl8dnT2dN={wv;aSP6NV-} z+)Ysj{YE!#D5yeLWN28}ZAHbSR8;NL@`DR!V96YdVPE108l{I@v)>jMstz}@2Xo0r zmz?Ln(%OrssNj;GnyO4WV@i87dKxL=$gyM%DdARq73h%3OfZ$q?SWSISyQm?Lt2;y z1D1jMD#)|m2WT$d{Yy$azSE(Ng#Mq=38b?AXMb3+#lt^c8(KTGdq;guiEWkM@G*OF zvy@9hlU!K#q>_QEnWC$GJ#%7hXLf(dn6v{_J9cp6{S~9%JchTxBRMs%suE$x)- z1-ft7!#6nGR-waC|DtN_zZrs~*2Nc+-*E*Tg4;(|){jbw`6z<0yCE@3g8||}2>i&0 zPl<>Wzkd%=$VdDM%)iUc&4mp{WnLYfk%60N#H_`!O}+RR(2pJP4h#9hqRi>~!0YbU zI8#|`#*zg%B-$HKO(qR!lNOk(Lo@S@J`$lGTA4=!>rljbc~7HaL1-uD<&dSw_ae2D zRg_8+zTqmggR#a{mX(#klHZ3B&M+bA(4IYeD8@-gkUaU7<16|Cz_HaE=*333>Xc;i zZ8x@8P2~+CzK6}pB?ljhzxM+q`m9w8IkIA6^+IyfXEA5E?X3)YzM+%{>uXZ?I>>&hm%Z5TOwqh65xa4812%s zc?0>-6I^UiZ{6jk1uv=)jDCz$W@Ful6eFHFa0nJ^-F7Yn{lT#R#W3qN0;+~Q+tUDQbJe5G#ay4#M@_FfXI3cEZZj*o|rMf4MCRlU_7(l(2wkJhs5?z}qZd*c_hk$~RE!X#D zR*Z}}Yll8+ME)7%{~R90HltPG-V2Hd6?xd{-zAI`5!v?|J2Zcd?~l8Ln@&{0o8|u% zF>Sw9!n+Rp2|ycrFghCT;SQucGEM8CK+iza!Ghi7YV`7Q(2+O^mE%6o79y z5@vEDJfUmt3$3>Am$t+j^=qLK5@tQ?Q0Z9|YQj60e0orh0bWnI&(4x+qh1Kuh1J%d zT)hvKyQGp2GgtNj4!k2FVVhTgkC8A{3b~)+oUikqPO0mTdy&fgACx~(7k1HJpI^7l z=i%@twezpKixY;mYOhwJ~e|a@#n_GIGij5QQCbDv>4bxF3>=KF`QdKWoF%r=C2FaP#A z8GcgVL};)tv$1ohtqJg_Eh@R(e0|0+;KWq(e*7d~sowd3PHYHLHn(ln z??zyY^gh^=h?EE)+JJ}TNGf?})V~&|#(BVbIq93%PRQLYeL-XA5~Kj;25{5gQa#n} zuisLivQ+Zs7B*kxj&~Q`@2C&m_JVr6oSd`mqSiRoMtaHX*U5D~tjIPHG!l$oc|IsL zyV_cu_|zb46-Xb!7y}jB3`oixnK7cYE!IwQ&4mf338!hZh$~|U*zBK&cm&_M@|{S! zKyDMeJRbVBo2$#Ng3CwtHl^)NSWq+}nlv~c8Yo8ZFiv>tkJS>2Qe~Qoazi0 zFd}y(ja($&aH=EbjaE7p|3ER(eKDQ zG%m|^fVVxVM5>;tH{+$lDKg9Yl_3z$YM%>SR|F@-<2}RK@bebvFsrxc(hy=J)MS~% z-L`O4D!N^(|9C}x2%FTk;7#YNE`FJpy6jdwIQyMX&&~Gk2-POgn=gv-pQN#Y2>i`y zdwd`Y)jK`>yg^30vmcGN+2Wdj##&Fzckhj2xzs0(8wfN%coyBUY6A-aRf2g=oI83& z!DZN(^p5ok+W8Zyuu|@z;0#_BKrvXF`wyv48wshYg(((ICvZ@SDY-m^1WvgR!ypc`PIUf4D5!t7Y9gHy zHoK%g;295B*M(0PMzn0BEUKmB{kwpNGT(?h&$+q$cJ!leX}7C9o-Zb2T4tQCTUf&+ zgTZ~MU2WFaay6w(Glb$tbV4?)qfqTYd3FNlmoKyo+du8N)MB6_1a8vT2)a!|x8?r% z=RSR;51v6gb|?;MZVF*m59;gtzQKq7uamxAXCJZg*ba}C{&b_~36l({0nuW+78Zoq z1uAvQ?%E{0_#rv^nPD%bw8{9ciGhv=2NrsBHkX2u$zsks*QFK(`(G#Kvg>=y*LR3o zsiO}G#=$K#4YaV3m&k(M2U!vPgi!!d$MC>OF&?XxnqZGrJ4xsol_jN{^BuG_4d8?B zjCiVCGi{X%R&liz)*b*uMDLX+KK>Wg`SPz8ZhQN_=Fjo|g@TwYcWc3iq?PjLXth=U zob%h&Gn_+agaRz{ zf?6M_B+*S7iHCz#GY&Efb*e-$T1u<|yb|E@AMys*g^oTLTZ{u0Za7s;t(GxlRVh-AuXbX(2*S#Hamxh=U-EZtAH5ARdG!yw-@ z0vMQ}SZPJ(ZM{foYfoSHfiDWN zkR!d$4L+&qm&Wy2${dH&$0DXo76>Xky;VBf$N56BCm06K*1W~U>Ae{Y^bwTrTyEE- zohov=ib9Ea{;T9a^^i2 zxiI+haZZjQCu6tgTO_r9+am=gFPpm?E$E@HmlwST=x4wLX8^62CLB zlpED0w*($3Ovk(2^w6*_p~9D?jr8W0Z(3e{izFU@EC7I7K~N%AN41+Hp~ zuuHZ$2&N#ZyDeA$1JuG^X$kIhwa+mc{MMpCHQ(a9??8%K6^0|&g^NsQiU6=*h)dPn zG|Eb6jiwEJ{Ohag6U=dk2%mWDvvGw{l7%`?@K=!1_Ru-BJ`f-u0v#b|MadarC!$CQ zxrm}2X(4B>u`@2y;^Ay>xgZe|W3MViiH+4NNzupb?rIHGyV=IO4fW}f@p}_|0-@iE zm&?xSBUAo9@Z1kOd;f3ZW&PE&U7v%yabY91`nO!%xTK-Z3UuFWRF|FBlOfelDvTi4qNIewdtT)zh@EBHlM6_d4O)0s8LY*8c;gKv^)IACB_xvvX!% zk4zEJb{Qbv;F61O&L~>E-&?a|W~qGjnx5)g?&`$-C&a01_|#k_B_p04d|$n*oO7A( zWb-hnDGj1XR?il@PL|>3?nrCrBqrZeQ0RN|=SST(H9%zb4Az*gwU?R+j}#=#rf07W zk3At|1bv#=LSpQ0mgicyI4Qy!TT{*j|Auk~A)A3mQuKG`(|<$r@7O6R;OMFH$b6E3 zgJfUBU4FFfNFdPdbg~>9Ye(+Ko}&nxSDkZ-_$x1Jna)yOnK&Jj9`(|wM$z7(?DX!x zvlaPlqvQ!AWx~?94f_t%=Yp}qH%|3f9i3Q$zlf?IE{x(~+o79$p}YE8IoE?F81Fta zU3J{D3OKty9+_$Fjl8)N_1eSfyCUbgdm55Ch>?VB9~C81~rHb>0mDDvXxqt$wS>V1C}IPSoX(AEb6lG9qydMZmtifllssFTj5S|9Z7MxAm0}Le7O- zYCLxVGC2P~u!c8W1LSgcT2oT5qyCqbva@IV-lRGRy`_YB!`w70viex|7>ez=&WNeEq$mlgwj|;rHXHnes#Mx>876oK@Y`o|aco{lJ6Av7?rbaR zWczu|@i~GrdY#|>_*xU zO+g(kXOD>?t%iH@sh3k z<|oRxwC|JjGev#c%`G@Ynl(e>EZH)Inve#`i~t8WsG`wIpB<+gLpwoR^H5KJOFH9+y*O(B>|bFuE?6 z(8mFyUc#sv$}{2Z|8bycLt}z$n$Kj_wkRw~0F$b7a_0Qc@$H*YfgN7AUW;gTegmI5 zU@5X?x=}hu-m#atdf)CJn@E&wS@B=JU^MeidC{|R(M}9-MZssv_J#TKn@0|kZuhlW zDb9%ghRXhOQ_6h%LP&IGw2<=}eP9@O7w)8)P2R~%=7QL)$(3nyd%;OiW}<#WDNC}K{70<PvkqY`#c&zyN>i^hVBtn6bkfoiJx2hjC3SC0n2$o4&>4gRIQ1I1hW&&pyarg$fk-mBm^`_Lm* zAClkTDGbiuXV`evoi|ol$m&aQEa%BCIn@{zY*ZsOisp+QHp6f#?|CE8&i+MdZF~m( z2;#{A|BoOQMOE1ntnUA!)NX0fuVSCn&W$@Vx2^*Z{RioCf{uUsQrv}PC{?2iX1-k0 z3sImt?{y)}G=0U_m;d+{I5b|2%~C`E4@LC3>Erl+p_Q3wZVW_n1!r8w=UvobggIF2 zvWZ_fO(Oj__bQ&%#=Zue7E)F>jVPlbJd}nPci9iJGnGYswsbVFDyb_ydBQ)GCCa=orZwALbh!%?@M$FE)}4hq6*zzbL)g~NyF2uIMi#|K4HC<&tJvFKi%9m`~4lGn@UR!gYHH@ zzZ*SQnM)#8cyN_$jxo6}=Rxq8=fEbXYL*g>v3=((Emk7Kqc-psgsg3pyKFZT1X$I> z?U@584nKuLPvX5@k5AN9UZ-&W&?$Ibx#@%?#y`s>^CYN z+@`h70wWZlGi%dugsoz+Vvwc+M%C)e`yIEPfw#yHj`EX!8r>7NFvRhOS(bhVyLzvJ zORPTWqRz~`iguLvBRa~dX5Br4`Jd+;G4=K#5~G{M?^Vq#3PqyenmL!SMFaAK0qI-D z8`9X+sMI0XhC79X+SV>9%g9`Vrl`8o9mE$8o39s5W%r@9!2kztUKfz+1qB7S+Jhdmi%d8qSd5CkXZp?ck-|vPr zb=&O2N@`U*42qAh_r(vGk8hM~o`B-UvY3)P#pz+PrmX-+#qT>2p*ivs*;R#l9%e(i z#C^o$HF4?X4#t(TS0QrsmGaWB7>_3q+6K$q?0jrbYVSIE1&<2%OLEytHNS1MmPmD#lqGe>uQKI1*$}-|rG3~@`!mU11@ODI z?})P=R4{jw45kq;mC|Q)NFHD0x6K^z-?@l{?nCv}+!U_bPB2NJmbD#|$cyIGes|Wv zq+U2irEzZ`jOtdg3c{H>wNf+CsC!B9`*I(->}QGeH+l8 zblR&VrJWYx^jcJo`Jj2mbzXxrv{^wPF@A-&n7sP5{$G3V8P-(VwGGE{)EOH(iiis6 zfCXtHO$ZPb1rY%O1*uU{0cp}pfG}dGDP3Bmi%5}9AVft$h=A145$PR-P!dAkmBcc0 z-+DjCa~#k6eeZjGU;fM}xU=`Q*R|HU);h0sqMvF4q-#gj!o2oBn3 z24m&qeN~ey0;^h0?Hvd;wMmoA@wtc(&w=$YaMG@-POCF4R9ZzPZnN|n`C?haXHXE) z;yt7^oEa$1DdbzlYEhkks>yZPDO0AQH)W;w3FyXjAsKI({fL(a$r)z=LxeR%g^%$# z9JD%>erY z3pDh>OWbZFjanSOsV@KGEszGH#&ohoxZ}-P9fk?d&c~< zFk$5Hg=2KVdRWjk>(#An4BTs)KPv4o)kB9or|xO!)R4!|pmzJp;!$@>r&8K?0HIk3T}2YX z=IT$b`&I=r1Y!exj3fCt%}lnY?BZ#%)~~G763G!fD~BO_q|B4~6c$guBddM`a9Li1 zHi~#I+(uxrB=PjKy};3w=Z3@S)(Xb>i<%ScGC36QqdjFiGQ}EBDSfGwKi65-QIe~X z9yxw{le+^!TRsoVEVpyTu|NRgNjA%ZcNS)Ksa=@J6x&deX6%8%5&{?ImZ@k-Mb0*p zxH;YxPcoVP{~s^VCFTM>1727vALl=?$6Izz#+!lN4Yr8&-1LRW6n&gkuSCE!;GI3P zawE6@#m?BZQ#Ps$dse*C+vM}No{YDmc5*;~ET?p0a~2RwJLM}Iwa(98EoXUw6wQOZ zD*k_uy%)Gq2EEASih0Kc&@MJId^1x|-TkG)QVgxte=krgUpm*ji+G@tS_!x!DuCFf zi|rsE!sk6ZTa;WmuajHP?o<}*nX z`|eks1SooU$x-E93~ur68~tqRJVP95_V<{{zEAr0)R2VLW_T;wKk<=F(jcZ@4&VJukf+`yehM|9pyAbIPvm59{fc!`W*t2C}+l2=8A9RNfLEg~>VJFjpGQ zdyFf|qfTwibNSU}bat7FaG2?{Y!6wPm`u8RzH>LiBk*SthL0OirB&&zO~JN1QN7a( z1Ga4i)(X5O)7ZoNyyBUQK)>!$;lbWKTq_6iMrOk(apU_(*6Lhec6OK{W6wJI3~lfV zg#xt3v<_w^bEhEmThY%U2Ou5N+;sW%60I$;N3q2^ZSJ6dHCw9gintz}7NLSf} zRRQg<6@qJ4C;C|l#aw^yFU@mqTeWpHLoRnO^<^3S{nk;P8mcG8C74cEo8wbyQ1JHZ zOG=P_%xdZ)GNz0#%^HT5r&*kIb4o8y&^w5Zl*i~>-F#N;vrg7NeJR(nh`~KY9LOKY zjPyxRUo*b{VcU~KYnf*szGW8(y(~e$gV0A)7pfu(3S_@>v}W^H(sGf7nlhYc8v4ty z^jSufvxuIc-`Yzyvqhq9j#=M3ib1eNMbXD=f7Hr?bYLU64j0O+Ji9$*{^QBFF%IE; zOHbkWj$jhqHzKzj8vlzv(k+_1{jHE;TByk$>KZVFH2g8g02YV%OF&0-^!zygK6VwB zspPYd@#Myu=bw2j_1FKsclc~#qxaxUeSkfiOZ1Xc&N``J+TE=#7(EuT;B!go7fS2z zl~8Mlc9%{3;8}}FS*Z%a8Jczpq8EORMzi{F5|6QxuIF6j zj(I*8`Ki_o^J9;3pEyQZagMck1Vamgdg;<<*bX}+$7))JQxilWzA zaW_A9JU8B~d$k*l6;~lo1@;3CrKu}ne=n1X+_x2#C&SvbQ`&P1p8>Cbs^;K2e_r@t zl_gL2?CTPa>@7j(+jw%*Sk|sb9o;Z4RTV#c+G0H8vDd5K^}S(`ndxEz5dt5-g*xxm z;8ld1>c8y!r%(9&eqURuabZ&*-_yB?CyKPyi}`ngtNSyhLW#VGBwrAw{c61rUD|d{ z$JnbP{~ugq2FoM;@uO4W7gvo-i(oOe zo$yber+5aJNZw%VOh)q%c6d~_WXQYC_Hm&ZdxvM{uVpOGFbs$*-Tz{nUrE8CXE_OT za4r-+F%du9>ohN&Em!n-5A~*hoR?l@i=33{%HICZOn}Fmw-7oC-+kLNhpYe#07lxF zOLe95Yu2n^AotGz#~NIJD>06KTVqOq)q0dP2S!m{#MQ=C>-MZ4h!j%fPf&k*r{(HO z({S2`fZ=I{6;EsaVTc>-6SEh8;sFL+H#huSFr5lvG(HT-`37)?MtGnBY4EaNZHW5^ z%||A^lJiHtp%bNfMtcZzrt=IxJu|3M&A=gc-8A(wN0MaUYL0_e z2Yfz8?`FJ&G%#kc^o&qdyVG*o*cQfnjZ>u}m;KskHlQ-Pfx~0_cf+K_b9u4JY630Tm&`M;wmUV22xbm2El+`DDMF|G9kcdw z$=4jR(ZASdMIPVctpnw~UUtp<=;^9X^`CaRQnyN%3dLz``*XkKlRsZ50PW4lt95IA zU6<>4{8NK;88ayQNDQP;%-#+`t_Xw~EgY}@o%P1VpY`{$MqUNdo#z@oO>b|)b-P*3 z1QD81d-n+**$ye;RK`e9RiH35b=ru!-wXItdfPm231;;706}GiJ(prF(sC~se zw_Pb1&8|zCpG0f8XbA2=CYg`1VO7|*L1C|kxuFA3vIGMU4jzR&+;*AbLZ|56*|B7p zc8k5|17^uCZSQP}?<{nV>U?izzMMJ0fBIk3WGEYs43Q-3Kg>N~HOJMs>C#nZF6E-_G`I>M~`@t$31>=#A>q6s_yz9{@;U@Wuv+^;u1$4^5 zMjazk1stLLfaY*r4@|cooXKHwa1yUVx*q7Sx%Iew(Yz1q9PUQK4RSMp>GrR6BnprsyL#ILp6>e10)L{vy%$m(h@f}eCvpLn`s9G-1!Y_78vkL z^MAm>y6CQsw9{$c&EH#q-h*1#{Ms^+N1Srg!pm}Gk^bknQP=;mXKzzyp`Qm`^$8Sd zSqH!9h{zEAWXG=Ky)0uDGaeYi5l~sVNLDc*M61}!5Mn|1Hm%+9&}V#X|$r+Z+1XmfvVLBIOb|k^CE=or%MacDbU(D z{e^B+&y1w=EYu*>*kx+#s;xY(c;d!aE|)a357j_P>NLqP>Ei8T9ozwe}}trkY- z!`i4)icMrI((|xs*Y!QRSdjR=`t%hs?*VOe#fIr`nG@oIPIAD$6MmbITdAH3;NJCY zF5u(7fZB;M5|c(6MM7I1#M0Sr`AbbE)9Aw$#zsI(7zO{WQvy)A{L*FhKI|mvH1_es zon}i3l$#~EKY$&)Z*6b9|Hwy_7H+cYMguSDRtcTr-S~5q##$wS8#8-v!5M%B?!XEe zY(uH_==4W_MzHwwmV!yLS}N%0$;foX+!X~rw%Dy*9hY27>IUUV@%3oN4f@I#>BXJ- zTg+G#{7OMKYY*YdPxB3hG8?p~kDpnHiI|4t5;D9(OM`7TgaOC zzp;u@C!-#OzI_rva)@m1>|Qt*r)Yeys~bJ(NpzVxtv84&+v~s+W#naRxQ*4Z0f{@(ILaT&0-oreT!YNKuBAyO?Uu>M4(*Q zuDx${$hh6U{WZ`*GzhzfVx2b66w^inX*-k)bU6=_B`L00h1*)L(*Gej&>k{nGAjR- z(?aQPo>K6Ny0z)AM>$Gfc>cbc+9jn=!oH`mbT%jO*+hU}A7ou|Ky!u9Q4W(R`$<}Z z&TQ%O7kWFp+9|(np1jhg_Zv7vSRJWf$M|vdznD;w?*ja`A8yHZs=E6L2Zbw4)=>d- zE&(rTn-JR2h;a8HA>1vihan786C3!MemwS1H%_n`C024 zpl)2Z>Dtd6?}1zARbT7jh&LW2?x4N0oj+gK)}gl~?Uc#xVgSt!h>hs?brd|a9h2vi zqF#Aw+c#ng;$!YlIx16rn%+uON1AVA3Bz~AydmFipgD8HK{Ysk z`7;y|bkp|>{S;0IL!uy4yIt{OhMR@6hU>}V?1=|~bElT1DJRQInoy?~_lXX_jktH| zk~{jU-Sr#&+Qe=`^Cnuw2d*yq^l+&js*=ei*~(_MAU>de$Ups6?2qlT$V2wdhGmsA ztmZVf__}mdMo9ivPfGFtkmTdF2Q^aY+}hDRv{N#9q`oo@pCz{XJ?-+#l53q0GrImt z#+cuQI^h?v^U`9D>w}?Km*_s8qJGEeAc+7zp1h2-Up!NY%P0z&VWy&%J{^VP)+AWk zSe|QMdBOp}MNU+6%p7qMRv8u>4Frp4)Ac^Vu`ZWq#|yf>hT`rvcCB*%Itb(|?~Zk{ z$i_Fdp(?poMTJmusLECc-7^2WeVuittJ}H1rlVYLYd+EZ=w$807(^bRj=FblZ`h+W zmJRhy`s#|=h0W31n-ptl=RdK5CPMr2$N|ata+fpEr}3>7<8E^98@OXFQ@~3wBJXot zK7J2i%WsQvsrB=Bj?*vqZ^Nv8GWAES^NspraHq_Z4u~fi6M-Icu8xf)O}}_PCkK23 z)ye|cg;ANFhl=`r{!wuBj8jyAuvF3Az)Aa6m`ySUL8~KjYtsc;C31c}E8zApdjV{+ z-@@e@piBa6!B=z6b)Y#odK2bWe?WD4(X8_dykj@BN7Q`pS_8%5rt@W1DK+;J9FZ#; zkb_b@yZF4JPUDOTpu)d*E0ptjNKbL|?BEgOUI3KBfcNsZeW%jC63!4G6~ht|G_P=$ zoGZ44GQ0>Xn>ZXh;9lr&?=8uqh;UjT)ZO-t;aRZeR8trKhuXh|iIMEba#em>vs@Ks zF3vMobQVV;Hy!@v&TlzG=0&qkq$yzCFuKsSep*LvvdI|;*Bw!*3BWCdNE(~juy0;t z|J`KIVBuea*&G*@T)l%i+i9*?LN9J&0rQLJe*pPGH^WT7pajx=C20Lg83d@xF}ZeW z`4|3~F+QCoql}Sn?XPah^+E4Vxxs$H2r2!5fNQt~v$^|Nn=fD?`Qu}&waFW$bI5E` z6Y!;rh`hYz-&#G0CxVddvne}zw)nZ1lwgx+d6jXVS2IpJ+s6m4ElyHLiFj6T#-j>* z{Zxhi=rUEKzj}kqRK7cne#pNuiY4d=7FHV`^U&zr`4>+QWEN82XU*eC8mD3vJ}KwP ztZK*c0!_IGdnqxB*_zd!%> z^qw+fqLl0A3bo*k8vN&}qfjxc`dFuI4005<8PFOtdv90`7IhPRP^>QB^JmYlTUmj6 z$;7fa>pUckIs-Bi9A0#!Cj5YpGlVqDw9u|fV0Xxt8STY03kOLOwjVWjue)^r-U?2u z6Mj3jlb(hgzSlUiaJtU!p+VYl{=)&Z3g-xo3b|(I=i_H9sFz9Se9c#J7p%=okH4(K zuao!b7iH_vy$A2zdbdMA=wS&qx>RuAo=xkcX^Mk1`cy@wXa4BasD+JK?4bKIop6im z6e>l;Gm<3v$ZhWW7O{kDel~C#eqpL7+}K$kzfN@}zd-!%&{8@{VZHa_dQqawkk(D6nmElzU?F z0!`nkXS01iaHr^z?Zw+8!?Dhl(Gpcy`Y5^^hTXu_96XG=#kr59=JSo6W1dR6nH@Fwn%rm|Rrv8$ zzm#Qnn!d~U3w{QrP^X>Z*dj5m5L%kFrnFq+-x%-jTK>D1*HdQq;gc*N!4M6akZKH65kV)t;Y-qc5~3RLMQ%`IH=Q+-vaTGzoq31tsT-%1zp z+|7zf8!e=qkpm+(6-ET>{$4(!8>TE@D)sHSRA`h*(IwBtlebrj3d=9WmwbvtTIS~) zH6r?T)|!_Og^3uJ2lAnu$68=ck3$6o#`E&QkS~}-k^1Zzb+Hz!2-90&7<56a1I%1; zE!!Zo%Y7)0mfz>Seb44q;PCQUd_wy4hb3CWKie5A3L^UNWJ1Zq%>kmc>)B$I7~ zyGl!xx`fs(9?VlapyF3LQ-g%*Afxq3q|}tywy}(zwk6`KK@T20;8k#MY-;k~E`Axt z)tBAnAi`jgV1c5cc;5g;>d>7#cVLJE7Nl`t$OFue;DagmE7t7F3NC;XKi(g6M^7x^ z`p*fd*27`?(FTX+Hw=+lxA_TfF@l_9eM`6hr_InVOUpz zXG49XyQz}#_}yBQqMi(OSpt_ccR?1Zuv%83!C07EC`2nt3{K{=M{2+zq(V^hmUS4A z>~sPJZxg4c^NcxLMi|v@;QeD5f(Fh3h8XDt3BbS?E@MY6n0!6b1%tMlo13?Ba>DrU z=PH3Q8gbIh1ESu3cJoLvb)%`5T<(1CrwU>@yzD&iq&;clt@0o4SX&kd7u%BR^*0#m zxlDMqj7X{F_dC*#r08{1WrSulj9U6T!q?SUFRtbAD5{;%Jkpt`}TSnGS;u5=%=<10Y&&xadO|bQ0I3@{FyD6s<&I8cXDzH5`dXt>O!gQ=g${YIDTm@ z`|Up4!`zJc>UY-S{Nf)z5~mBN#JA!ER5fHN4l9W+7lH&BhdsL<3VB5u_?=WP54j{K zp8)$cX+_}nzW<@FY_NoeGe$bguX^on50|of;h2HrkG{MN@cd+R*pBaJM>9-o=5^D7b zmLmsf@4F&8i+U|>3>OzFY`aW|J=Hax8`F)AOC=}Q5q5MjvSzgUZ-VVhkH-6FzemRXY92aN}^ZgwreP-Gf|7#ebR?gjF|p#Mj5yYAO^yx6^tR zF|KNI{e_OU%Tj)KSN_1u>7AL(U@6I|cIGBBp`yM0bcS7K&-Ta9ESnAd&3|BU8~0es5tqlUJvm@b2(h+X(xy9~a7~{b z#|~t=nl}{96!#_4K3pNE&Jy7{BArfxp9O7Va)z;?zJ8Gxor+%i#D&WiViqG>%i|KR zWF^mx>J9MH$W7(tcqiXxv53NfNBq(RXVdAq{$b%qyH|l8^J5TVg*h{EFnCGLWzS!S zkN>&$2k+>fg-S8X$&qi1&cf^30|ifKA5EYN^XEr0RM1_iYLm&7@;mr84jjS0tk5=t zhT<-2Q$y5_+Sh1ON0mGaEMxa-EH zviv;5u+D<3!_Uj9kpij@)*(nUmEFLzK4KR>vu-tE{-S~VzlOp$aIPIa&jCciUC zwfsPY4P_jTa%idVQXo4Q_YsxzJR(vPPNX!sJ)!Ijw=#C_{!GT|8kwaOTU{5$ z=ZgC$v+&_nU$@YO1|L+vpVmrMWCxh>)yd5XFo~yHHy6f;BHpcL5PN0K-4E|`8X7vk zzf;2Wjh>j|r$oo_)jVQsU)GiT`Ew9ePqTwM^@?IE)!CG}>@OtaNBh1@8ZV~-nk#xr zE0W0_Baff2gVCus{6!Lr<2B#@*r)DJ&X-c>iJ7k9)ASlMQCfpQ?1Y`7pD)?2F{0=;fw_@@d}J5 zkJ>G}#v7Ad;7*&HAuagX`=0hyM1dB0x#-;h(r`$)ftA%$ zq*uGGt)at5yje}V!QxrGWk;T0V5%vb7UA*w*lh(~q-Q#+DP_BKrBhuxe-(9&#he?HcXMl_nZ#(mh>{h+b0k`zARyAnJx;r6xBht)F?5X%^qqd=o6Ryr4 zu+3p7@)ccmzuI+QHE1@|u*H$TX%C&cGi9u%-6C{C9FA1RvW~S&S`_CjwzYAHdQB|H zFNtP)i@|K&s))lI1EhqVA1m z_)o!fq{z;M0Fh2Oag}YUeCh-nj=tIY6{Id)f&cY%_rXv+DU+qL84Npre}7c!0X}-; zEJuOWNK3&corP@q1oGON(v%@C@|+3%Ob;q!a`ygVQtxx!;5?OyqiESq^z^QcS(=RP8@%XPf(P0#VXn>lEq zBS=-RX_6_fQ+x4jXJp?v#R(QhGYFM#q!KxPpqmL88fSO`BEZxKSmC8br%t8kH z`f`ol*ASVm)tp>36_~lu@MaaqE&>|I871?6`*c$61E9V_!W)>X6v@>){Wl>-(t*Y%(8gZ_CthU6^L+i7Qh}cl# z`O}a_$1NtCp3>o|Po{iNqy)vHLj=g<{m)#|8mIEc$a!rC#}8cdyY}0UGs2q@avh=m zK7f&C-;`}JDVcq?V*lKGg@Wa0-1A#STEh%%sSO$AtvU-=a@EuwPH}PiwZ`@(ZA0#L zs<5SvKf~Dbx_;!#@R)s@xtwv3vfs(JA+Wl+RYsDzVRyHWx7NiZoqXG$kQwdz=7WIh z#6rUimi8tl4M~kZvnc^qQ-lcHoxYIXuH&O( zY5yQtK+vk=tjUqok{jA2y#>jJQxYG~QNx%CGq5(9qXjTe`i}q>P{dZ5%_HZ$(aU8H zE;R{H4vZ0Df5wkzcz24;3?xHJq^O?|^g8JV&Ew)eMmq67%se5AC!O%MYkFDq= z&EYCLvZwP>b}!M@Vfe2#^P*$X21~EK_)J1EqmBL*OYsZI`nXK1$yc%IHT$Is9Oq?O zMJDsRVJlU!VLElWu2&D6Pvg(`;$rC96xy0j2H`P3(b>J5zd>uCqhEeh=s4k)=P{zh zI%*8)u2xo6FnXu(gN$r;l9#`VsJSEGiTlQ-9&cZ8Y{;>dSq}Pg*Be-UzM%t1Y2~O6 z?dkU+WX7OG=Nu}3@UQuRr8eG@UrwYDW{ytE4y1~xO{N*n9Jk4=mA0ZkeNme1FkhBW z7M^VEfBCvh>>$MQl$$2jB?C6R8+K3{wb;A!-5FlV>q|ZR{lplPSEcy+I}{eX9-(() z7k(cWU0a{rwC`H0Vaoo>VPoHB+=ZE``-XfIU4`@~6RMQ3@%DV1g@sVY1g!#__8vKX zOLVZ*=!Oj`MWu6M{VnbU?$?QdUlHZs^OD`kl5$yD6~Sgk)Ib%_J*q*-<=LffMZ3Wf zk*Ip(t(bRu7L%>f4LKz@>eFepQTg3vtJe6p#r8o2C-fw2Hk>H6uKuf6#>9?krXmt6 zE!fsnw@{{OUa}CjLSf^AdHX^g#1&HX_FK`KK9|H5`RBcD563=4W*L^XbtL99%=UZG2oh;X zzI@XB18O3T(DsH@zkE?cLxc2oY$**kg16@|4ow|Tm?S(B@HY!zPn}jZ)%F!FJ&|uQ zxi?g!WiD+CxCCtH>vN`(sKtdRT}f$V;nt7_%%yOryV)t9-3LKq-Rd<=IsUb1#qjh`SmwACerp)S4#w92xVm)Q{tidT(r|&!5hO zM3`VoY-M=8*crP}mwbs4igtdTk4|cPQoWp5Jm=g}LC23- zvtELQUB~hoF}XopxyL}no$@=jVjfn<7qKbDf8JBdAxZW-Qb(^w?#NG9HBJ^xU!}Yc zqnT7&!@=aB_go-cZCLos=qe%1VfFN+O+3`_q`y2)1Tyyn8|Xg7cfM*mn(YI7%tlV0lqL2JSfH>M!d~l zyy+mMn~{3*xkEELfPOrz7me@k6&p`GEb*7lu*5A@zOSqFrC{^5cC(Xuw+t)yLb&sM z*1FQuJZWVq*ln{&hii4YmYB?mm*n@Z0W9XoTx!_eoA`qYk`~Q}I_bd^7Ma34c^0fN zeoM)8tMfk6&IC-}{XteC4Y7OF9Ku4d;@6Gw_d?;#h$l)52jMox2_MRpzJF z!Y!dZZ?Z*9jpoDBL_=&k!iBy}_Ir5mYqjxfvha@@WU8@!gYnU}zP>(5L%a1b)(?S5 zR3mwHc6k<$jjfhr(-(jFa(~_diQe^}o2gdkV>(9hs!AFUFMjH)ARU@9V(sAU<>g^E@%eFOYrf zOO5`k4gJfm`R|zc|1D6cDBxeW{?e@zKMu@a|MmxNAMtAqoAC9& z!!Q4T*k}bYI}>F8bHRu0G>?yok3+>~!o-vEP?jAkU)N=$3-mAvKl7nKFKql@;D`Tz zgZ&=>5C2Da+5f@d{QtO7WY7A2evb@QoKe(L{)3&ve1&Q3&MF%@-hS_Ek?9rRL72tU zS|X(^p|u!+?@ZN6Dcgt??&O351|%3&56g$H!jRJU61AWXD5V+%Vol4?$klK;$^7}z z!o!K;2FAvo4Ej9Ii#p8itRwCny<_B&3Zuje3y*X%g-8(yyWb}6)j``5q)7VVVjXsN zFw`45)|1&+NvBVruBxhnp`OHRwa_k%RP=NY64<+UFX*KnS@`HPZ{5Va!qGFo$X#|M zg^Bc51o9!B$3Xd^sFx{B6^NWD6x2yRcm6zmX%NMCh}R}??AAGKH+Am&ugZ3faVW0-E5$xhqw;4M{aCJ#KG zzkfLBKft3aCTB2|Ew}C843+!u)Rdoo19Q+DL;GE3-|+{6aP|w8Kwj0LzW#n=Xzn@; z)`$oR8N$H$puK9^<8?ftAt9jP?8Mj1I)r>;Ur71T@B5=~kd0k9w{Dd*MCV;UfBt;a z!#zrox-d*$8>GCnv|d0nheS=;0(Lu>fHUYx>w_9D?)6>({ zg9PBm7JFr6WK@HAG{aD{ACy3k9Stg&HGp`$7>!fyMKR5Z`Jza1D^4I0{_ki`Cja9^Mv-M+Ue7uXJ&+igftZw`x;M! zL}QQuoi@*q6pOk<9%N-7ZF@s}#m3JZJND{BoDzsecIO@jHbkrfX5$XY1L;gvVFY6H zI)ZqlKqZJpnAOJ8N$6$!>PRVOW6Fj=X%@;TjE9#noUxAitM|@pnC}n8IW_Bph=s$z z`&PSYrrmVmLiF6b=JHZkW!Bi@J+L%&FW2wFn@Bi zBc5G^K{$r|_SH!Ue}(toG6k8=RUcL`C*r%!48WU(*B=d4I09xKwqS1mTP}1o$Qvew zn;c956!J9M^S?r$0shCe*X-4RSU zaincw+;nEsJ?X>QZogGbT)8xpB@wXQza2Cah;wvI)K;9$XS2XhQ)zgDDG4QwPwEFPU5P`}DO;ikZ8b;!K4Lmjr6GO*N;7@hA4aPu! z)d^&gU<-g`BXW5M9yAyRtD_1)p5PlFB|b5+FYYeyX-h)uQ-64j*LBKg)W`PLr+xjy z+#fyWy#;~QO1s}wbIUA;yVZ=F`_(W)v2JwtgIU_MP4mTSk*-<*9N#E zAWOvuBn`YH*KwibGmnFCxJlxbUkaf23eSKau7Ib_Kob?yIj6aH%GiLD+Cn0cK;94- zGfedW5KmBWpFI^MASO2XEG;7v=`vBlhmB4?oujvtyH)L0f`5cI++xO0Z;`ps);kg& zCue8dzw}-)oN-gP1jry{BZq1~ZV8~{)Elo$j{pIR#X(#*2$yWwDgE52=x}HM2mCtG z!i!)KwJ3(i2vQ2Ho+Mf<+(XM(WDDhsmKpL1`qD ziJz;4XeH%V1v!2O+2N-W@=deDe>?W|e#iFSY?@`-!UgNGO;)_G>x9=u-8F{l#{u}J zC5c*jhqs)1iN%`?#6b%1_bvOP?aH}_MQ`acH#aCd!23&ywm%KpgM04ovVXH#CDHl) zul=~?VH^ZE<$zso%uvBAU-q!TK2TaLt3{@nxh!1OR~BdMs8p@>XDL_j87|P`54oDa2cRjsk-dR zwHa^A1A-gqPZ8$YEJsI1U?Z&FxXTrU9FdpyOB&`|S(uv-wdGlY)`hxyHN;-f;(Is| zZ>|P{Ff7iHx)3-v)gTBlQt3{0a;!W0wT5*^E3VHmSLf)L)j15S(-{Z#F5OHM&o+w` zctKX+fQB<}4{Q@La0Hc|YH+hI8W#mbMn*@W!QXRd=koF-Ls2!Ti-6ILk;}v5-ItdZ zheiE0BMx199Slw!w3ZYpjXEF{3m?;RKM?!H;72c~Xb+A-7?Mo4>MDWo3*tm6d7=iF z#5Nzs+@ESyd-K}lPe~Ch4b~~ypXX+^(h-Ae;EbZ6a);o#2);Ze#<}g03NPq!S><%C zbR>xsfH4H_lo6$vj^K_$1XKWY&`qCk8O{h0aB|lG4ILnRMCnrJ#-n!B;+pXbw*Wh! zmZr*>>OCM{8zdk*@-*QZTp9>%5D(`;uEZUmmf!@a1&CLmEwmr!VShadq-j)xK-o5> zSW@wo$)7i1qvnL)yGb}$Tm#))bE+LUMVU@V9q`Ogj``FZaA8Bk!(~8ntb!L~wX`su8VcJ+K=sTfj?*I- z|KedpNjQBxI^rUK>EM9_+d)BNX>lg1)CipDWNS_eW99k>gcA$kP0YkLuL7SwHpaBK zBW!y!HPdgAA4`rdFE4i_zl;T;#?E(-_?Z%U?bZe3M{Zqx_izt023cB+HG;+k2q+n8 zY1x5_RC4jQy6ow8AP zW3CHaTenTvT&fwoOLDQK^GF`qM2+Xf!-?bOHqFoV;d@j)@59C|0_Ax}y%P>tY6A{* z#+m>kH?Cg>U?XBua-{R*^zUG(5NjPGMKogMq@=`z;g0JQRUuwkJPRlfW8G8}EADK{ zjf9&rHzfTNDl!2(&BZH#e0Hh)S_DRo4 zj79op1fuQJJvg}5W(~Vs55Vj!D5GoQyk@Fpa1hqP1_^9RPN3GS(V3B!M(g3g zMl@eW9=+xPwPF}@(b5tzu>)||d+hQc){EBO4ZoyDFN9DFk&_O;eWBRd8gzXF0zE$6 z-!ED`mJK^KCAB`_5jXA^gf|bf?BK0w2}?=*cyk?7{Sb5=^U*{v3A38#85zsvGNseN zgfj+!Wl7E;00}8r;4ye3)%`KK@Fh$hYtQT3;8RzwUVmh=WJW`7IPsd;5rBT+g#cgN z0Jh~MVcx{TVps+sW39)4ggVECyz5Os!i4;z8Z_!a3vL=Xk2dZwow*KnQOcRe*%01$ z+7$MHnw>`umu+tm5c8PGMiXodqJ`2%*}iO&C~eFP)jpD9zNid9Lr6q~05%`hH3YhN zUO+>I%V?iHn?+upSb=C!(tsI;PgX8X2V%!Ss*dHN+&)+C=X2|O)F?{fhtPU!@xhX1sEo7S`G;xrLZ>u1K$NX)aQxf z$>a&+QjbN*EkK!uDf0$9K`TKS5SFL9W;D1iFa!{JKLYxE@YqtA{(Kjq z6sEj*&uy+d8+q`M1nQzrl4RNn?Ro)q7ua@p&6*xw8;nO2I}U&@~s7Ho@0I;su`dasefkh_IA4N9OL{W|b^AC`L3OdP7EG!-c4+5?3}MAVrixZ%#? z`3P~-Shs=40;>78-OKIW%X5u7n(5D<8!=ZcNeyJzmU`~;L1raM`3S2#{#*!88Vm~w zk#w?xO{isHfWQ3qLCpDO~;W& z3jl-as4lw-9-t5JI&|?RBzN`YIcwLhg}2NU2!-&7&&_oNQgpbHy=q$ESVKx}01CcR z9v(l{W(;x-TS%1JZHm)}yMQbRK04WkX?RG?ys;vzaw$?#QAVbqwA9Niv-z{XC}Fl9 z;L~&coMe~DE|6)IgM9@aKR*YwIUkxZZ|up+GZBZN$=!G?#u^D>m&xh_wC~R?G4KEq zGQh}&o)-YX1fU?RxeLfkHHclF0E=w)tRGaIFKXm>aB!j8GH?S$B6%KDkZ*tkl?CD? z9FZl&$6p2jTghCJEO^A%*! z7^qsPqe=ls`mEk?I;O{~?~zIk2+VUl=;$4s7}Jr|IeYf3uC6oq8x(j#h~vf432Q-^ zhMDSR-8J!;hK$W)y44Iv7|kfI0bKd%(=DOO(gPH5NhKu)CF@abNW>~^lrje?uoO1*ai2CCY9K02@&}bB(35aYl zfI*<5&Smd4b+8q?rWxF~nGI@yl-}jOK?M0?h%xE)T*}uA?JcD5UYkWKoNL7U=VzDl+64nIdixIT*2H(6VtETQkY3# z=JV$sApV_i-987~15!{KF|o}4GF`|4(CBUmJ0MUv7nZxG)_P9Rd4QSYdwSBhXv)`h z1tAbE;YQ3bL9l_#b-!5CHdZ0Mb?a8<*#uuKZiX9b~LHyJD~GeeHlwX30SHq{fMi;}>uKF9-sjP5=M^ literal 0 HcmV?d00001 diff --git a/bloomfilter/src/Data/BloomFilter/Blocked/Calc.hs b/bloomfilter/src/Data/BloomFilter/Blocked/Calc.hs index aac49c6ef..4e2720013 100644 --- a/bloomfilter/src/Data/BloomFilter/Blocked/Calc.hs +++ b/bloomfilter/src/Data/BloomFilter/Blocked/Calc.hs @@ -43,6 +43,7 @@ policyForFPR fpr = {- Regression, FPR indepedent, bits depedent: Fit {fitParams = V3 8.035531421107756e-2 1.653017726702572 0.5343568065075601, fitErrors = V3 7.602655075308541e-4 8.422591688796256e-3 2.0396917012822195e-2, fitNDF = 996, fitWSSR = 18.362899348627252} +Fit {fitParams = V3 8.079418894776325e-2 1.6462569292513933 0.5550062950289885, fitErrors = V3 7.713375250014809e-4 8.542261871094414e-3 2.0678969159415226e-2, fitNDF = 996, fitWSSR = 19.00125036371992} -} policyForBits :: BitsPerEntry -> BloomPolicy @@ -73,6 +74,7 @@ policyFPR BloomPolicy { {- Regression, bits indepedent, FPR depedent: Fit {fitParams = V3 (-4.990533525011442e-3) 0.5236326626983274 (-9.08567744857578e-2), fitErrors = V3 3.2672398863476205e-5 8.69874829861453e-4 4.98365450607998e-3, fitNDF = 996, fitWSSR = 1.4326826384055948} +Fit {fitParams = V3 (-5.03623760876204e-3) 0.5251544487138062 (-0.10110451821280719), fitErrors = V3 3.344945010267228e-5 8.905631581753235e-4 5.102181306816477e-3, fitNDF = 996, fitWSSR = 1.5016403117905384} -} sizeForFPR :: FPR -> NumEntries -> BloomSize diff --git a/bloomfilter/src/Data/BloomFilter/Hash.hs b/bloomfilter/src/Data/BloomFilter/Hash.hs index dd362305b..f778b4799 100644 --- a/bloomfilter/src/Data/BloomFilter/Hash.hs +++ b/bloomfilter/src/Data/BloomFilter/Hash.hs @@ -73,6 +73,13 @@ instance Hashable Word64 where instance Hashable Word32 where hashSalt64 salt w = XXH3.xxh3_64bit_withSeed_w32 w salt +instance Hashable Word where + hashSalt64 salt n = hashSalt64 salt (fromIntegral n :: Word64) + --32bit support would need some CPP here to select based on word size + +instance Hashable Int where + hashSalt64 salt n = hashSalt64 salt (fromIntegral n :: Word) + {- Note [Tree hashing] We recursively hash inductive types (instead e.g. just serially hashing diff --git a/bloomfilter/tests/fpr-calc.hs b/bloomfilter/tests/fpr-calc.hs new file mode 100644 index 000000000..8d17db755 --- /dev/null +++ b/bloomfilter/tests/fpr-calc.hs @@ -0,0 +1,184 @@ +{-# LANGUAGE ParallelListComp #-} +module Main (main) where + +import qualified Data.BloomFilter as B (BitsPerEntry, BloomPolicy, BloomSize, + FPR, Hashable) +import qualified Data.BloomFilter.Blocked as B.Blocked +import qualified Data.BloomFilter.Classic as B.Classic + +import Control.Parallel.Strategies +import Data.IntSet (IntSet) +import qualified Data.IntSet as IntSet +import Data.List (unfoldr) +import Math.Regression.Simple +import System.IO +import System.Random + +import Prelude hiding (elem) + +-- | Write out data files used by gnuplot fpr.plot +main :: IO () +main = do + hSetBuffering stdout NoBuffering --for progress reporting + + withFile "bloomfilter/fpr.classic.gnuplot.data" WriteMode $ \h -> do + hSetBuffering h LineBuffering --for incremental output + mapM_ (\l -> hPutStrLn h l >> putChar '.') $ + [ unwords [show bitsperkey, show y1, show y2] + | (bitsperkey, _) <- xs_classic + | y1 <- ys_classic_calc + | y2 <- ys_classic_actual + ] + putStrLn "Wrote bloomfilter/fpr.classic.gnuplot.data" + + withFile "bloomfilter/fpr.blocked.gnuplot.data" WriteMode $ \h -> do + hSetBuffering h LineBuffering --for incremental output + mapM_ (\l -> hPutStrLn h l >> putChar '.') $ + [ unwords [show bitsperkey, show y1, show y2] + | (bitsperkey, _) <- xs_blocked + | y1 <- ys_blocked_calc + | y2 <- ys_blocked_actual + ] + putStrLn "Wrote bloomfilter/fpr.blocked.gnuplot.data" + + let regressionData :: [(Double, Double)] + regressionData = zip (map fst xs_blocked) + (map (negate . log) ys_blocked_actual) + regressionBitsToFPR = quadraticFit (\(x,y)->(x,y)) regressionData + regressionFPRToBits = quadraticFit (\(x,y)->(y,x)) regressionData + putStrLn "" + putStrLn "Blocked bloom filter quadratic regressions:" + putStrLn "bits indepedent, FPR depedent:" + print regressionBitsToFPR + putStrLn "" + putStrLn "FPR indepedent, bits depedent:" + print regressionFPRToBits + where + -- x axis values + xs_classic = + [ (bitsperkey, g) + | bitsperkey <- [2,2.3..20] + , g <- mkStdGen <$> [1..3] + ] + -- We use fewer points for classic, as it's slower and there's less need. + + xs_blocked = + [ (bitsperkey, g) + | bitsperkey <- [2,2.2..24] + , g <- mkStdGen <$> [1..9] + ] + + ys_classic_calc, ys_classic_actual, + ys_blocked_calc, ys_blocked_actual :: [Double] + + ys_classic_calc = ys_calc classicBloomImpl xs_classic + ys_blocked_calc = ys_calc blockedBloomImpl xs_blocked + + ys_classic_actual = ys_actual classicBloomImpl xs_classic + ys_blocked_actual = ys_actual blockedBloomImpl xs_blocked + + ys_calc :: BloomImpl b -> [(Double, StdGen)] -> [Double] + ys_calc BloomImpl{..} xs = + [ fpr + | (bitsperkey, _) <- xs + , let policy = policyForBits bitsperkey + fpr = policyFPR policy + ] + + ys_actual :: BloomImpl b -> [(Double, StdGen)] -> [Double] + ys_actual impl@BloomImpl{..} xs = + withStrategy (parList rseq) -- eval in parallel + [ fpr + | (bitsperkey, g) <- xs + , let policy = policyForBits bitsperkey + fpr_est = policyFPR policy + nentries = round (1000 * recip fpr_est) + fpr = actualFalsePositiveRate impl policy nentries g + ] +{- + -- fpr values in the range 1e-1 .. 1e-6 + ys = [ exp (-log_fpr) + | log_fpr <- [2.3,2.4 .. 13.8] ] + + xs_classic_calc = xs_calc classicBloomImpl + xs_blocked_calc = xs_calc blockedBloomImpl + + xs_calc BloomImpl{..} = + [ bits + | fpr <- ys + , let policy = policyForFPR fpr + bits = policyBits policy + ] +-} + +actualFalsePositiveRate :: BloomImpl bloom + -> B.BloomPolicy -> Int -> StdGen -> Double +actualFalsePositiveRate bloomimpl policy n g0 = + fromIntegral (countFalsePositives bloomimpl policy n g0) + / fromIntegral n + +countFalsePositives :: forall bloom. BloomImpl bloom + -> B.BloomPolicy -> Int -> StdGen -> Int +countFalsePositives BloomImpl{..} policy n g0 = + let (!g0', !g0'') = split g0 + + -- create a bloom filter from n elements from g0 + size = sizeForPolicy policy n + + xs_b :: bloom Int + !xs_b = unfold size nextElement (g0', 0) + + -- and a set, so we can make sure we don't count true positives + xs_s :: IntSet + !xs_s = IntSet.fromList (unfoldr nextElement (g0', 0)) + + -- now for a different random sequence (that will mostly not overlap) + -- count the number of false positives + in length + [ () + | y <- unfoldr nextElement (g0'', 0) + , y `elem` xs_b -- Bloom filter reports positive + , not (y `IntSet.member` xs_s) -- but it is not a true positive + ] + where + nextElement :: (StdGen, Int) -> Maybe (Int, (StdGen, Int)) + nextElement (!g, !i) + | i >= n = Nothing + | otherwise = Just (x, (g', i+1)) + where + (!x, !g') = uniform g + +data BloomImpl bloom = BloomImpl { + policyForBits :: B.BitsPerEntry -> B.BloomPolicy, + policyForFPR :: B.FPR -> B.BloomPolicy, + policyBits :: B.BloomPolicy -> B.BitsPerEntry, + policyFPR :: B.BloomPolicy -> B.FPR, + sizeForPolicy :: B.BloomPolicy -> Int -> B.BloomSize, + unfold :: forall a b. B.Hashable a + => B.BloomSize -> (b -> Maybe (a, b)) -> b -> bloom a, + elem :: forall a. B.Hashable a => a -> bloom a -> Bool + } + +classicBloomImpl :: BloomImpl B.Classic.Bloom +classicBloomImpl = + BloomImpl { + policyForBits = B.Classic.policyForBits, + policyForFPR = B.Classic.policyForFPR, + policyBits = B.Classic.policyBits, + policyFPR = B.Classic.policyFPR, + sizeForPolicy = B.Classic.sizeForPolicy, + unfold = B.Classic.unfold, + elem = B.Classic.elem + } + +blockedBloomImpl :: BloomImpl B.Blocked.Bloom +blockedBloomImpl = + BloomImpl { + policyForBits = B.Blocked.policyForBits, + policyForFPR = B.Blocked.policyForFPR, + policyBits = B.Blocked.policyBits, + policyFPR = B.Blocked.policyFPR, + sizeForPolicy = B.Blocked.sizeForPolicy, + unfold = B.Blocked.unfold, + elem = B.Blocked.elem + } diff --git a/cabal.project.release b/cabal.project.release index 99c81e0a7..895b82d2e 100644 --- a/cabal.project.release +++ b/cabal.project.release @@ -14,6 +14,9 @@ benchmarks: True -- Luckily, bloomfilter is not commonly used package, so this is good enough. constraints: bloomfilter <0 +-- regression-simple is used by the bloomfilter-fpr-calc test executable +allow-newer: regression-simple:base + -- comment me if you don't have liburing installed -- -- TODO: it is slightly unfortunate that one has to manually remove this file diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 051c71858..a4ef261f0 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -456,6 +456,19 @@ benchmark bloomfilter-bench , lsm-tree:bloomfilter , random +executable bloomfilter-fpr-calc + import: language, warnings + scope: private + hs-source-dirs: bloomfilter/tests + main-is: fpr-calc.hs + build-depends: + , base + , containers + , lsm-tree:bloomfilter + , parallel + , random + , regression-simple + executable bloomfilter-spell import: language scope: private From 52cdac3ec065829b8a306a9e2cc6a97c4f34c3f7 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Mon, 14 Apr 2025 19:47:06 +0100 Subject: [PATCH 30/43] bloomfilter: add operation (?) = flip elem and make the example program a tad more elegant --- bloomfilter/examples/spell.hs | 6 +++--- bloomfilter/src/Data/BloomFilter/Blocked.hs | 12 ++++++++++++ bloomfilter/src/Data/BloomFilter/Classic.hs | 16 ++++++++++++++-- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/bloomfilter/examples/spell.hs b/bloomfilter/examples/spell.hs index ae2407a6d..a1cf356a4 100644 --- a/bloomfilter/examples/spell.hs +++ b/bloomfilter/examples/spell.hs @@ -11,6 +11,6 @@ main = do files <- getArgs dictionary <- readFile "/usr/share/dict/words" let !bloom = B.fromList (B.policyForFPR 0.01) (words dictionary) - forM_ files $ \file -> do - ws <- words <$> readFile file - forM_ ws $ \w -> when (w `B.notElem` bloom) $ putStrLn w + forM_ files $ \file -> + putStrLn . unlines . filter (`B.notElem` bloom) . words + =<< readFile file diff --git a/bloomfilter/src/Data/BloomFilter/Blocked.hs b/bloomfilter/src/Data/BloomFilter/Blocked.hs index ffcc0c558..71d1c07e5 100644 --- a/bloomfilter/src/Data/BloomFilter/Blocked.hs +++ b/bloomfilter/src/Data/BloomFilter/Blocked.hs @@ -44,6 +44,7 @@ module Data.BloomFilter.Blocked ( size, elem, notElem, + (?), serialise, -- * Mutable Bloom filters @@ -113,6 +114,17 @@ insert = \ !mb !x -> insertHashes mb (hashes x) elem :: Hashable a => a -> Bloom a -> Bool elem = \ !x !b -> elemHashes b (hashes x) +-- | Same as 'elem' but with the opposite argument order: +-- +-- > x `elem` bfilter +-- +-- versus +-- +-- > bfilter ? x +-- +(?) :: Hashable a => Bloom a -> a -> Bool +(?) = flip elem + {-# INLINE notElem #-} -- | Query an immutable Bloom filter for non-membership. If the value -- /is/ present, return @False@. If the value is not present, there diff --git a/bloomfilter/src/Data/BloomFilter/Classic.hs b/bloomfilter/src/Data/BloomFilter/Classic.hs index 36ea5c7d3..bdfbfdc74 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic.hs @@ -55,6 +55,7 @@ module Data.BloomFilter.Classic ( size, elem, notElem, + (?), serialise, -- * Mutable Bloom filters @@ -117,13 +118,24 @@ insert !mb !x = insertHashes mb (hashes x) -- present, return @True@. If the value is not present, there is -- /still/ some possibility that @True@ will be returned. elem :: Hashable a => a -> Bloom a -> Bool -elem !x !b = elemHashes b (hashes x) +elem = \ !x !b -> elemHashes b (hashes x) + +-- | Same as 'elem' but with the opposite argument order: +-- +-- > x `elem` bfilter +-- +-- versus +-- +-- > bfilter ? x +-- +(?) :: Hashable a => Bloom a -> a -> Bool +(?) = flip elem -- | Query an immutable Bloom filter for non-membership. If the value -- /is/ present, return @False@. If the value is not present, there -- is /still/ some possibility that @False@ will be returned. notElem :: Hashable a => a -> Bloom a -> Bool -notElem = \elt ub -> not (elt `elem` ub) +notElem = \ x b -> not (x `elem` b) -- | Build an immutable Bloom filter from a seed value. The seeding -- function populates the filter as follows. From d808621ce9a34e703067a1587ed28a821fceb65e Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Sun, 13 Apr 2025 19:43:11 +0100 Subject: [PATCH 31/43] bloomfilter: export a formatVersion number --- bloomfilter/src/Data/BloomFilter/Blocked.hs | 5 ++- .../src/Data/BloomFilter/Blocked/Internal.hs | 34 ++++++++++++++++--- bloomfilter/src/Data/BloomFilter/Classic.hs | 5 ++- .../src/Data/BloomFilter/Classic/Internal.hs | 34 ++++++++++++++++--- 4 files changed, 68 insertions(+), 10 deletions(-) diff --git a/bloomfilter/src/Data/BloomFilter/Blocked.hs b/bloomfilter/src/Data/BloomFilter/Blocked.hs index 71d1c07e5..359efd6b2 100644 --- a/bloomfilter/src/Data/BloomFilter/Blocked.hs +++ b/bloomfilter/src/Data/BloomFilter/Blocked.hs @@ -25,6 +25,10 @@ module Data.BloomFilter.Blocked ( create, unfold, fromList, + + -- ** (De)Serialisation + formatVersion, + serialise, deserialise, -- ** Sizes @@ -45,7 +49,6 @@ module Data.BloomFilter.Blocked ( elem, notElem, (?), - serialise, -- * Mutable Bloom filters MBloom, diff --git a/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs b/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs index 07acc456b..b1a3cc839 100644 --- a/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs +++ b/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs @@ -25,11 +25,14 @@ module Data.BloomFilter.Blocked.Internal ( prefetchElem, -- * Conversion - serialise, - deserialise, freeze, unsafeFreeze, thaw, + + -- * (De)Serialisation + formatVersion, + serialise, + deserialise, ) where import Control.DeepSeq (NFData (..)) @@ -48,6 +51,18 @@ import qualified Data.BloomFilter.Blocked.BitArray as BitArray import Data.BloomFilter.Classic.Calc import Data.BloomFilter.Hash +-- | The version of the format used by 'serialise' and 'deserialise'. The +-- format number will change when there is an incompatible change in the +-- library, such that deserialising and using the filter will not work. +-- This can include more than just changes to the serialised format, for +-- example changes to hash functions or how the hash is mapped to bits. +-- +-- Note that the format produced does not include this version. Version +-- checking is the responsibility of the user of the library. +-- +formatVersion :: Int +formatVersion = 1000 + ------------------------------------------------------------------------------- -- Mutable Bloom filters -- @@ -111,8 +126,13 @@ prefetchInsert MBloom { mbNumBlocks, mbBitArray } !h = blockIx :: BlockIx (!blockIx, _) = blockIxAndBitGen h mbNumBlocks --- | Modify the filter's bit array. The callback is expected to read (exactly) --- the given number of bytes into the given byte array buffer. +-- | Overwrite the filter's bit array. Use 'new' to create a filter of the +-- expected size and then use this function to fill in the bit data. +-- +-- The callback is expected to read (exactly) the given number of bytes into +-- the given byte array buffer. +-- +-- See also 'formatVersion' for compatibility advice. -- deserialise :: PrimMonad m => MBloom (PrimState m) a @@ -184,6 +204,12 @@ prefetchElem Bloom { numBlocks, bitArray } !h = blockIx :: BlockIx (!blockIx, _) = blockIxAndBitGen h numBlocks +-- | Serialise the bloom filter to a 'BloomSize' (which is needed to +-- deserialise) and a 'ByteArray' along with the offset and length containing +-- the filter's bit data. +-- +-- See also 'formatVersion' for compatibility advice. +-- serialise :: Bloom a -> (BloomSize, ByteArray, Int, Int) serialise b@Bloom{bitArray} = (size b, ba, off, len) diff --git a/bloomfilter/src/Data/BloomFilter/Classic.hs b/bloomfilter/src/Data/BloomFilter/Classic.hs index bdfbfdc74..000cf204e 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic.hs @@ -36,6 +36,10 @@ module Data.BloomFilter.Classic ( create, unfold, fromList, + + -- ** (De)Serialisation + formatVersion, + serialise, deserialise, -- ** Sizes @@ -56,7 +60,6 @@ module Data.BloomFilter.Classic ( elem, notElem, (?), - serialise, -- * Mutable Bloom filters MBloom, diff --git a/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs index 8c293394b..a5ad58b8c 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs @@ -22,11 +22,14 @@ module Data.BloomFilter.Classic.Internal ( elemHashes, -- * Conversion - serialise, - deserialise, freeze, unsafeFreeze, thaw, + + -- * (De)Serialisation + formatVersion, + serialise, + deserialise, ) where import Control.DeepSeq (NFData (..)) @@ -53,6 +56,18 @@ import qualified Data.BloomFilter.Classic.BitArray as BitArray import Data.BloomFilter.Classic.Calc import Data.BloomFilter.Hash +-- | The version of the format used by 'serialise' and 'deserialise'. The +-- format number will change when there is an incompatible change in the +-- library, such that deserialising and using the filter will not work. +-- This can include more than just changes to the serialised format, for +-- example changes to hash functions or how the hash is mapped to bits. +-- +-- Note that the format produced does not include this version. Version +-- checking is the responsibility of the user of the library. +-- +formatVersion :: Int +formatVersion = 0 + ------------------------------------------------------------------------------- -- Mutable Bloom filters -- @@ -100,8 +115,13 @@ insertHashes MBloom { mbNumBits = m, mbNumHashes = k, mbBitArray = a } !ch = -- and Int because it's less than the filter size. BitArray.unsafeSet a idx >> go (i + 1) --- | Modify the filter's bit array. The callback is expected to read (exactly) --- the given number of bytes into the given byte array buffer. +-- | Overwrite the filter's bit array. Use 'new' to create a filter of the +-- expected size and then use this function to fill in the bit data. +-- +-- The callback is expected to read (exactly) the given number of bytes into +-- the given byte array buffer. +-- +-- See also 'formatVersion' for compatibility advice. -- deserialise :: PrimMonad m => MBloom (PrimState m) a @@ -166,6 +186,12 @@ elemHashes Bloom { numBits, numHashes, bitArray } !ch = then go (i + 1) else False +-- | Serialise the bloom filter to a 'BloomSize' (which is needed to +-- deserialise) and a 'ByteArray' along with the offset and length containing +-- the filter's bit data. +-- +-- See also 'formatVersion' for compatibility advice. +-- serialise :: Bloom a -> (BloomSize, ByteArray, Int, Int) serialise b@Bloom{bitArray} = (size b, ba, off, len) From 685e3d2f7f53ecd4c414b48669344ef18ff1ade7 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Sun, 13 Apr 2025 19:45:26 +0100 Subject: [PATCH 32/43] Use Bloom.filterVersion number in the lsm-tree serialisation code --- src/Database/LSMTree/Internal/BloomFilter.hs | 8 +++++++- test/Test/Database/LSMTree/Internal/BloomFilter.hs | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Database/LSMTree/Internal/BloomFilter.hs b/src/Database/LSMTree/Internal/BloomFilter.hs index 2a24ca569..ab506ab91 100644 --- a/src/Database/LSMTree/Internal/BloomFilter.hs +++ b/src/Database/LSMTree/Internal/BloomFilter.hs @@ -1,6 +1,7 @@ {-# OPTIONS_HADDOCK not-home #-} module Database.LSMTree.Internal.BloomFilter ( + bloomFilterVersion, bloomFilterToLBS, bloomFilterFromFile, ) where @@ -25,8 +26,13 @@ import Database.LSMTree.Internal.CRC32C (FileCorruptedError (..), -- | By writing out the version in host endianness, we also indicate endianness. -- During deserialisation, we would discover an endianness mismatch. +-- +-- We base our version number on the 'BF.formatVersion' from the @bloomfilter@ +-- library, plus our own version here. This accounts both for changes in the +-- format code here, and changes in the library. +-- bloomFilterVersion :: Word32 -bloomFilterVersion = 1 +bloomFilterVersion = 1 + fromIntegral BF.formatVersion bloomFilterToLBS :: BF.Bloom a -> LBS.ByteString bloomFilterToLBS bf = diff --git a/test/Test/Database/LSMTree/Internal/BloomFilter.hs b/test/Test/Database/LSMTree/Internal/BloomFilter.hs index 062905113..b099b63db 100644 --- a/test/Test/Database/LSMTree/Internal/BloomFilter.hs +++ b/test/Test/Database/LSMTree/Internal/BloomFilter.hs @@ -98,7 +98,7 @@ prop_total_deserialisation_whitebox hsn (Small nword64s) = prop_total_deserialisation (prefix <> BS.pack bytes) where prefix = LBS.toStrict $ BS.Builder.toLazyByteString $ - BS.Builder.word32Host 1 {- version -} + BS.Builder.word32Host bloomFilterVersion <> BS.Builder.word32Host hsn <> BS.Builder.word64Host (fromIntegral nword64s) From 018153982347962a1d1fac132faa7598e8c08d13 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Sun, 13 Apr 2025 21:38:04 +0100 Subject: [PATCH 33/43] bloomfilter: switch range reduction from division to multiplication In the Classic implementation. The Blocked one uses this already. The classic algorithm is the remainder after division. But division is slow, especially for 64bit numbers. There's a faster method based on multiplication. This changes the format version, since it changes the mapping from element hash to the bit selected. --- .../src/Data/BloomFilter/Classic/Internal.hs | 74 +++++++++++-------- 1 file changed, 45 insertions(+), 29 deletions(-) diff --git a/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs index a5ad58b8c..049d59612 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs @@ -43,12 +43,9 @@ import Data.Primitive.PrimArray import Data.Primitive.Types (Prim (..)) import Data.Word (Word64) -#if MIN_VERSION_base(4,17,0) -import GHC.Exts (remWord64#) -#else -import GHC.Exts (remWord#) -#endif -import GHC.Exts (Int#, uncheckedIShiftL#, (+#)) +import GHC.Exts (Int (I#), Int#, int2Word#, timesWord2#, + uncheckedIShiftL#, word2Int#, (+#)) +import qualified GHC.Exts as Exts import GHC.Word (Word64 (W64#)) import Data.BloomFilter.Classic.BitArray (BitArray, MBitArray) @@ -65,8 +62,15 @@ import Data.BloomFilter.Hash -- Note that the format produced does not include this version. Version -- checking is the responsibility of the user of the library. -- +-- History: +-- +-- * Version 0: original +-- +-- * Version 1: changed range reduction (of hash to bit index) from remainder +-- to method based on multiplication. +-- formatVersion :: Int -formatVersion = 0 +formatVersion = 1 ------------------------------------------------------------------------------- -- Mutable Bloom filters @@ -102,18 +106,14 @@ new BloomSize { sizeBits, sizeHashes } = do } insertHashes :: MBloom s a -> Hashes a -> ST s () -insertHashes MBloom { mbNumBits = m, mbNumHashes = k, mbBitArray = a } !ch = +insertHashes MBloom { mbNumBits, mbNumHashes, mbBitArray } !ch = go 0 where - go !i | i >= k = return () - go !i = let idx' :: Word64 - !idx' = evalHashes ch i in - let idx :: Int - !idx = fromIntegral (idx' `unsafeRemWord64` fromIntegral m) in - -- While the idx' can cover the full Word64 range, - -- after taking the remainder, it now must fit in - -- and Int because it's less than the filter size. - BitArray.unsafeSet a idx >> go (i + 1) + go !i | i >= mbNumHashes = return () + go !i = do + let idx = reduceRange64 (evalHashes ch i) mbNumBits + BitArray.unsafeSet mbBitArray idx + go (i + 1) -- | Overwrite the filter's bit array. Use 'new' to create a filter of the -- expected size and then use this function to fill in the bit data. @@ -175,14 +175,9 @@ elemHashes Bloom { numBits, numHashes, bitArray } !ch = go :: Int -> Bool go !i | i >= numHashes = True - go !i = let idx' :: Word64 - !idx' = evalHashes ch i in - let idx :: Int - !idx = fromIntegral (idx' `unsafeRemWord64` fromIntegral numBits) in - -- While the idx' can cover the full Word64 range, - -- after taking the remainder, it now must fit in - -- and Int because it's less than the filter size. - if BitArray.unsafeIndex bitArray idx + go !i = + let idx = reduceRange64 (evalHashes ch i) numBits + in if BitArray.unsafeIndex bitArray idx then go (i + 1) else False @@ -244,12 +239,33 @@ thaw Bloom { numBits, numHashes, bitArray } = do -- Low level utils -- --- | Like 'rem' but does not check for division by 0. -unsafeRemWord64 :: Word64 -> Word64 -> Word64 +-- | Given a word sampled uniformly from the full 'Word64' range, such as a +-- hash, reduce it fairly to a value in the range @[0,n)@. +-- +-- See +-- +{-# INLINE reduceRange64 #-} +reduceRange64 :: Word64 -- ^ Sample from 0..2^64-1 + -> Int -- ^ upper bound of range [0,n) + -> Int -- ^ result within range +reduceRange64 (W64# x) (I# n) = + -- Note that we use widening multiplication of two 64bit numbers, with a + -- 128bit result. GHC provides a primop which returns the 128bit result as + -- a pair of 64bit words. There are (as of 2025) no high level wrappers in + -- the base or primitive packages, so we use the primops directly. + case timesWord2# (word64ToWordShim# x) (int2Word# n) of + (# high, _low #) -> I# (word2Int# high) + -- Note that while x can cover the full Word64 range, since the result is + -- less than n, and since n was an Int then the result fits an Int too. + +{-# INLINE word64ToWordShim# #-} + #if MIN_VERSION_base(4,17,0) -unsafeRemWord64 (W64# x#) (W64# y#) = W64# (x# `remWord64#` y#) +word64ToWordShim# :: Exts.Word64# -> Exts.Word# +word64ToWordShim# = Exts.word64ToWord# #else -unsafeRemWord64 (W64# x#) (W64# y#) = W64# (x# `remWord#` y#) +word64ToWordShim# :: Exts.Word# -> Exts.Word# +word64ToWordShim# x# = x# #endif ------------------------------------------------------------------------------- From cca509b503d4fc48718317d4b155859aa2cd205b Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Thu, 10 Apr 2025 22:20:32 +0100 Subject: [PATCH 34/43] Re-export (M)Bloom via D.LSMTree.I.BloomFilter to reduce coupling So fewer places depend on the exact BloomFilter implementation. In particular several places just need to talk about the type without using any operations. This will make switching implementation easier. --- lsm-tree.cabal | 1 - src-extras/Database/LSMTree/Extras/NoThunks.hs | 2 +- src/Database/LSMTree/Internal/BloomFilter.hs | 2 ++ src/Database/LSMTree/Internal/ChecksumHandle.hs | 3 +-- src/Database/LSMTree/Internal/Lookup.hs | 2 +- src/Database/LSMTree/Internal/MergeSchedule.hs | 2 +- src/Database/LSMTree/Internal/Run.hs | 4 ++-- src/Database/LSMTree/Internal/RunBuilder.hs | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lsm-tree.cabal b/lsm-tree.cabal index a4ef261f0..6e4c810ff 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -508,7 +508,6 @@ library extras , io-classes:strict-stm , lsm-tree , lsm-tree:blockio-api - , lsm-tree:bloomfilter , lsm-tree:control , lsm-tree:kmerge , lsm-tree:prototypes diff --git a/src-extras/Database/LSMTree/Extras/NoThunks.hs b/src-extras/Database/LSMTree/Extras/NoThunks.hs index 5ac6ddbf4..8d257a54f 100644 --- a/src-extras/Database/LSMTree/Extras/NoThunks.hs +++ b/src-extras/Database/LSMTree/Extras/NoThunks.hs @@ -21,7 +21,6 @@ import Control.Monad.ST.Unsafe (unsafeIOToST, unsafeSTToIO) import Control.RefCount import Control.Tracer import Data.Bit -import Data.BloomFilter (Bloom, MBloom) import Data.Map.Strict import Data.Primitive import Data.Primitive.PrimVar @@ -35,6 +34,7 @@ import Data.Word import Database.LSMTree.Internal.Arena import Database.LSMTree.Internal.BlobFile import Database.LSMTree.Internal.BlobRef +import Database.LSMTree.Internal.BloomFilter (Bloom, MBloom) import Database.LSMTree.Internal.ChecksumHandle import Database.LSMTree.Internal.Chunk import Database.LSMTree.Internal.Config diff --git a/src/Database/LSMTree/Internal/BloomFilter.hs b/src/Database/LSMTree/Internal/BloomFilter.hs index ab506ab91..e44e26899 100644 --- a/src/Database/LSMTree/Internal/BloomFilter.hs +++ b/src/Database/LSMTree/Internal/BloomFilter.hs @@ -1,6 +1,8 @@ {-# OPTIONS_HADDOCK not-home #-} module Database.LSMTree.Internal.BloomFilter ( + BF.Bloom, + BF.MBloom, bloomFilterVersion, bloomFilterToLBS, bloomFilterFromFile, diff --git a/src/Database/LSMTree/Internal/ChecksumHandle.hs b/src/Database/LSMTree/Internal/ChecksumHandle.hs index 6aea6ae01..69897e7e0 100644 --- a/src/Database/LSMTree/Internal/ChecksumHandle.hs +++ b/src/Database/LSMTree/Internal/ChecksumHandle.hs @@ -24,13 +24,12 @@ module Database.LSMTree.Internal.ChecksumHandle import Control.Monad.Class.MonadSTM (MonadSTM (..)) import Control.Monad.Class.MonadThrow (MonadThrow) import Control.Monad.Primitive -import Data.BloomFilter (Bloom) import qualified Data.ByteString.Lazy as BSL import Data.Primitive.PrimVar import Data.Word (Word64) import Database.LSMTree.Internal.BlobRef (BlobSpan (..), RawBlobRef) import qualified Database.LSMTree.Internal.BlobRef as BlobRef -import Database.LSMTree.Internal.BloomFilter (bloomFilterToLBS) +import Database.LSMTree.Internal.BloomFilter (Bloom, bloomFilterToLBS) import Database.LSMTree.Internal.Chunk (Chunk) import qualified Database.LSMTree.Internal.Chunk as Chunk (toByteString) import Database.LSMTree.Internal.CRC32C (CRC32C) diff --git a/src/Database/LSMTree/Internal/Lookup.hs b/src/Database/LSMTree/Internal/Lookup.hs index eaab22597..07f43a55c 100644 --- a/src/Database/LSMTree/Internal/Lookup.hs +++ b/src/Database/LSMTree/Internal/Lookup.hs @@ -19,7 +19,6 @@ module Database.LSMTree.Internal.Lookup ( ) where import Data.Bifunctor -import Data.BloomFilter (Bloom) import Data.Primitive.ByteArray import qualified Data.Vector as V import qualified Data.Vector.Mutable as VM @@ -37,6 +36,7 @@ import Control.Monad.ST.Strict import Control.RefCount import Database.LSMTree.Internal.BlobRef (WeakBlobRef (..)) +import Database.LSMTree.Internal.BloomFilter (Bloom) import Database.LSMTree.Internal.Entry import Database.LSMTree.Internal.Index (Index) import qualified Database.LSMTree.Internal.Index as Index (search) diff --git a/src/Database/LSMTree/Internal/MergeSchedule.hs b/src/Database/LSMTree/Internal/MergeSchedule.hs index 829cd7974..863658d42 100644 --- a/src/Database/LSMTree/Internal/MergeSchedule.hs +++ b/src/Database/LSMTree/Internal/MergeSchedule.hs @@ -53,10 +53,10 @@ import Control.Monad.Class.MonadThrow (MonadMask, MonadThrow (..)) import Control.Monad.Primitive import Control.RefCount import Control.Tracer -import Data.BloomFilter (Bloom) import Data.Foldable (fold, traverse_) import qualified Data.Vector as V import Database.LSMTree.Internal.Assertions (assert) +import Database.LSMTree.Internal.BloomFilter (Bloom) import Database.LSMTree.Internal.Config import Database.LSMTree.Internal.Entry (Entry, NumEntries (..), unNumEntries) diff --git a/src/Database/LSMTree/Internal/Run.hs b/src/Database/LSMTree/Internal/Run.hs index c076058df..44811b24c 100644 --- a/src/Database/LSMTree/Internal/Run.hs +++ b/src/Database/LSMTree/Internal/Run.hs @@ -36,14 +36,14 @@ import Control.Monad.Class.MonadSTM (MonadSTM (..)) import Control.Monad.Class.MonadThrow import Control.Monad.Primitive import Control.RefCount -import Data.BloomFilter (Bloom) import qualified Data.ByteString.Short as SBS import Data.Foldable (for_) import Database.LSMTree.Internal.BlobFile import Database.LSMTree.Internal.BlobRef hiding (mkRawBlobRef, mkWeakBlobRef) import qualified Database.LSMTree.Internal.BlobRef as BlobRef -import Database.LSMTree.Internal.BloomFilter (bloomFilterFromFile) +import Database.LSMTree.Internal.BloomFilter (Bloom, + bloomFilterFromFile) import qualified Database.LSMTree.Internal.CRC32C as CRC import Database.LSMTree.Internal.Entry (NumEntries (..)) import Database.LSMTree.Internal.Index (Index, IndexType (..)) diff --git a/src/Database/LSMTree/Internal/RunBuilder.hs b/src/Database/LSMTree/Internal/RunBuilder.hs index 7583dde4a..b440c52fc 100644 --- a/src/Database/LSMTree/Internal/RunBuilder.hs +++ b/src/Database/LSMTree/Internal/RunBuilder.hs @@ -22,11 +22,11 @@ import qualified Control.Monad.Class.MonadST as ST import Control.Monad.Class.MonadSTM (MonadSTM (..)) import Control.Monad.Class.MonadThrow (MonadThrow) import Control.Monad.Primitive -import Data.BloomFilter (Bloom) import Data.Foldable (for_, traverse_) import Data.Primitive.PrimVar import Data.Word (Word64) import Database.LSMTree.Internal.BlobRef (RawBlobRef) +import Database.LSMTree.Internal.BloomFilter (Bloom) import Database.LSMTree.Internal.ChecksumHandle import qualified Database.LSMTree.Internal.CRC32C as CRC import Database.LSMTree.Internal.Entry From b977a02ae9ede897dc1aaa7b25dd364aad9d234c Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 15 Apr 2025 16:23:51 +0100 Subject: [PATCH 35/43] Switch lsm-tree to use the Blocked bloom filter implementation --- bench/macro/lsm-tree-bench-bloomfilter.hs | 4 ++-- bench/macro/lsm-tree-bench-lookups.hs | 4 ++-- src/Database/LSMTree/Internal/BloomFilter.hs | 2 +- src/Database/LSMTree/Internal/BloomFilterQuery1.hs | 4 ++-- src/Database/LSMTree/Internal/RunAcc.hs | 4 ++-- test/Test/Database/LSMTree/Internal/BloomFilter.hs | 2 +- test/Test/Database/LSMTree/Internal/Lookup.hs | 4 ++-- test/Test/Database/LSMTree/Internal/Merge.hs | 2 +- test/Test/Database/LSMTree/Internal/RunAcc.hs | 2 +- 9 files changed, 14 insertions(+), 14 deletions(-) diff --git a/bench/macro/lsm-tree-bench-bloomfilter.hs b/bench/macro/lsm-tree-bench-bloomfilter.hs index 0339ab6e8..368c19d4d 100644 --- a/bench/macro/lsm-tree-bench-bloomfilter.hs +++ b/bench/macro/lsm-tree-bench-bloomfilter.hs @@ -9,8 +9,8 @@ import Control.Monad import Control.Monad.ST import Control.Monad.ST.Unsafe import Data.Bits ((.&.)) -import Data.BloomFilter (Bloom, BloomSize) -import qualified Data.BloomFilter as Bloom +import Data.BloomFilter.Blocked (Bloom, BloomSize) +import qualified Data.BloomFilter.Blocked as Bloom import Data.Time import Data.Vector (Vector) import qualified Data.Vector as V diff --git a/bench/macro/lsm-tree-bench-lookups.hs b/bench/macro/lsm-tree-bench-lookups.hs index 0ad4c30ed..457d5246b 100644 --- a/bench/macro/lsm-tree-bench-lookups.hs +++ b/bench/macro/lsm-tree-bench-lookups.hs @@ -9,8 +9,8 @@ import Control.Monad.Primitive import Control.Monad.ST.Strict (ST, runST) import Control.RefCount import Data.Bits ((.&.)) -import Data.BloomFilter (Bloom) -import qualified Data.BloomFilter as Bloom +import Data.BloomFilter.Blocked (Bloom) +import qualified Data.BloomFilter.Blocked as Bloom import Data.Time import qualified Data.Vector as V import Data.Vector.Algorithms.Merge as Merge diff --git a/src/Database/LSMTree/Internal/BloomFilter.hs b/src/Database/LSMTree/Internal/BloomFilter.hs index e44e26899..a514f0fe7 100644 --- a/src/Database/LSMTree/Internal/BloomFilter.hs +++ b/src/Database/LSMTree/Internal/BloomFilter.hs @@ -18,7 +18,7 @@ import qualified Data.Primitive.ByteArray as P import Data.Word (Word32, Word64, byteSwap32) import System.FS.API -import qualified Data.BloomFilter as BF +import qualified Data.BloomFilter.Blocked as BF import Database.LSMTree.Internal.ByteString (byteArrayToByteString) import Database.LSMTree.Internal.CRC32C (FileCorruptedError (..), FileFormat (..)) diff --git a/src/Database/LSMTree/Internal/BloomFilterQuery1.hs b/src/Database/LSMTree/Internal/BloomFilterQuery1.hs index 35b249bb1..b442a257a 100644 --- a/src/Database/LSMTree/Internal/BloomFilterQuery1.hs +++ b/src/Database/LSMTree/Internal/BloomFilterQuery1.hs @@ -20,8 +20,8 @@ import Data.Word (Word32) import Control.Exception (assert) import Control.Monad.ST (ST) -import Data.BloomFilter (Bloom) -import qualified Data.BloomFilter as Bloom +import Data.BloomFilter.Blocked (Bloom) +import qualified Data.BloomFilter.Blocked as Bloom import Database.LSMTree.Internal.Serialise (SerialisedKey) diff --git a/src/Database/LSMTree/Internal/RunAcc.hs b/src/Database/LSMTree/Internal/RunAcc.hs index cfe0d39fe..9ad2ed52f 100644 --- a/src/Database/LSMTree/Internal/RunAcc.hs +++ b/src/Database/LSMTree/Internal/RunAcc.hs @@ -34,8 +34,8 @@ module Database.LSMTree.Internal.RunAcc ( import Control.DeepSeq (NFData (..)) import Control.Exception (assert) import Control.Monad.ST.Strict -import Data.BloomFilter (Bloom, MBloom) -import qualified Data.BloomFilter as Bloom +import Data.BloomFilter.Blocked (Bloom, MBloom) +import qualified Data.BloomFilter.Blocked as Bloom import Data.Primitive.PrimVar (PrimVar, modifyPrimVar, newPrimVar, readPrimVar) import Data.Word (Word64) diff --git a/test/Test/Database/LSMTree/Internal/BloomFilter.hs b/test/Test/Database/LSMTree/Internal/BloomFilter.hs index b099b63db..1cdbfb9ba 100644 --- a/test/Test/Database/LSMTree/Internal/BloomFilter.hs +++ b/test/Test/Database/LSMTree/Internal/BloomFilter.hs @@ -25,7 +25,7 @@ import Test.QuickCheck.Instances () import Test.Tasty (TestTree, testGroup) import Test.Tasty.QuickCheck hiding ((.&.)) -import qualified Data.BloomFilter as BF +import qualified Data.BloomFilter.Blocked as BF import Database.LSMTree.Internal.BloomFilter import qualified Database.LSMTree.Internal.BloomFilterQuery1 as Bloom1 import Database.LSMTree.Internal.Serialise (SerialisedKey, diff --git a/test/Test/Database/LSMTree/Internal/Lookup.hs b/test/Test/Database/LSMTree/Internal/Lookup.hs index 1df8cf339..26d39c13a 100644 --- a/test/Test/Database/LSMTree/Internal/Lookup.hs +++ b/test/Test/Database/LSMTree/Internal/Lookup.hs @@ -24,8 +24,8 @@ import Control.Exception import Control.Monad.ST.Strict import Control.RefCount import Data.Bifunctor -import Data.BloomFilter (Bloom) -import qualified Data.BloomFilter as Bloom +import Data.BloomFilter.Blocked (Bloom) +import qualified Data.BloomFilter.Blocked as Bloom import Data.Coerce (coerce) import Data.Either (rights) import qualified Data.Foldable as F diff --git a/test/Test/Database/LSMTree/Internal/Merge.hs b/test/Test/Database/LSMTree/Internal/Merge.hs index 22c83e07b..29de6629e 100644 --- a/test/Test/Database/LSMTree/Internal/Merge.hs +++ b/test/Test/Database/LSMTree/Internal/Merge.hs @@ -3,7 +3,7 @@ module Test.Database.LSMTree.Internal.Merge (tests) where import Control.Exception (evaluate) import Control.RefCount import Data.Bifoldable (bifoldMap) -import qualified Data.BloomFilter as Bloom +import qualified Data.BloomFilter.Blocked as Bloom import Data.Foldable (traverse_) import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map diff --git a/test/Test/Database/LSMTree/Internal/RunAcc.hs b/test/Test/Database/LSMTree/Internal/RunAcc.hs index 9172fcbd5..4be8fc1b0 100644 --- a/test/Test/Database/LSMTree/Internal/RunAcc.hs +++ b/test/Test/Database/LSMTree/Internal/RunAcc.hs @@ -8,7 +8,7 @@ module Test.Database.LSMTree.Internal.RunAcc (tests) where import Control.Exception (assert) import Control.Monad.ST import Data.Bifunctor (Bifunctor (..)) -import qualified Data.BloomFilter as Bloom +import qualified Data.BloomFilter.Blocked as Bloom import qualified Data.ByteString as BS import qualified Data.ByteString.Short as SBS import Data.Maybe From 4ce0308704ecfc7c3be044e48d0e4352035ad4f4 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 22 Apr 2025 17:45:21 +0100 Subject: [PATCH 36/43] bloomfilter: enable the same warnings as other packages --- bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs | 2 +- bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs | 6 +++--- bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs | 2 +- bloomfilter/src/Data/BloomFilter/Classic/Calc.hs | 4 ++-- bloomfilter/src/Data/BloomFilter/Classic/Internal.hs | 3 +-- lsm-tree.cabal | 4 ++-- 6 files changed, 10 insertions(+), 11 deletions(-) diff --git a/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs b/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs index 833ff1c30..65a4911be 100644 --- a/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs +++ b/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs @@ -45,7 +45,7 @@ import GHC.ST (ST (ST)) -- so that multiple accesses within a single block will use only one cache line. -- newtype BitArray = BitArray (PrimArray Word64) - deriving (Eq, Show) + deriving stock (Eq, Show) -- | The number of 512-bit blocks for the given number of bits. This rounds -- up to the nearest multiple of 512. diff --git a/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs b/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs index b1a3cc839..ed12968a2 100644 --- a/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs +++ b/bloomfilter/src/Data/BloomFilter/Blocked/Internal.hs @@ -153,17 +153,17 @@ data Bloom a = Bloom { , numHashes :: {-# UNPACK #-} !Int , bitArray :: {-# UNPACK #-} !BitArray } - deriving Eq + deriving stock Eq type role Bloom nominal bloomInvariant :: Bloom a -> Bool bloomInvariant Bloom { numBlocks, bitArray = BitArray.BitArray pa } = - fromIntegral numBlocks * 8 == sizeofPrimArray pa + numBlocks * 8 == sizeofPrimArray pa instance Show (Bloom a) where show mb = "Bloom { " ++ show numBits ++ " bits } " where - numBits = blocksToBits (fromIntegral (numBlocks mb)) + numBits = blocksToBits (numBlocks mb) instance NFData (Bloom a) where rnf !_ = () diff --git a/bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs b/bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs index 5e53b3fe6..3b31a173a 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs @@ -31,7 +31,7 @@ import GHC.ST (ST (ST)) -- -- This vector's offset and length are multiples of 64 newtype BitArray = BitArray (PrimArray Word64) - deriving (Eq, Show) + deriving stock (Eq, Show) {-# INLINE unsafeIndex #-} unsafeIndex :: BitArray -> Int -> Bool diff --git a/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs b/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs index 2a9079004..1f9f0c840 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs @@ -43,7 +43,7 @@ data BloomPolicy = BloomPolicy { policyBits :: !Double, policyHashes :: !Int } - deriving Show + deriving stock Show policyForFPR :: FPR -> BloomPolicy policyForFPR fpr | fpr <= 0 || fpr >= 1 = @@ -96,7 +96,7 @@ data BloomSize = BloomSize { -- | The number of hash functions to use. sizeHashes :: !Int } - deriving Show + deriving stock Show sizeForFPR :: FPR -> NumEntries -> BloomSize sizeForFPR = sizeForPolicy . policyForFPR diff --git a/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs index 049d59612..b6eeec50a 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/Internal.hs @@ -142,7 +142,7 @@ data Bloom a = Bloom { , numHashes :: {-# UNPACK #-} !Int , bitArray :: {-# UNPACK #-} !BitArray } - deriving Eq + deriving stock Eq type role Bloom nominal bloomInvariant :: Bloom a -> Bool @@ -276,7 +276,6 @@ word64ToWordShim# x# = x# -- -- See 'evalHashes'. data Hashes a = Hashes !Hash !Hash - deriving Show type role Hashes nominal instance Prim (Hashes a) where diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 6e4c810ff..333b9d846 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -406,7 +406,7 @@ test-suite xxhash-tests -- this fork doesn't work on 32bit systems library bloomfilter - import: language + import: language, warnings visibility: private hs-source-dirs: bloomfilter/src build-depends: @@ -430,7 +430,7 @@ library bloomfilter Data.BloomFilter.Classic.Calc Data.BloomFilter.Classic.Internal - ghc-options: -O2 -Wall + ghc-options: -O2 test-suite bloomfilter-tests import: language, warnings From b4231aec61571dec9045c5d4b6b9dec18b534019 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 29 Apr 2025 14:30:53 +0100 Subject: [PATCH 37/43] Update bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs Co-authored-by: Joris Dral --- bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs b/bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs index 3b31a173a..9f53d952d 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/BitArray.hs @@ -72,7 +72,7 @@ new s = do -- this may overflow, but so be it (1^64 bits is a lot) roundUpTo64 :: Int -> Int - roundUpTo64 i = unsafeShiftR (i + 63) 6 + roundUpTo64 i = unsafeShiftR (i + 63) 6 -- `div` 64, rounded up serialise :: BitArray -> (ByteArray, Int, Int) serialise bitArray = From dbd10c84d11f9edd66281d1272d22da1d97e35aa Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 29 Apr 2025 14:31:48 +0100 Subject: [PATCH 38/43] Update bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs Co-authored-by: Joris Dral --- bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs b/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs index 65a4911be..b406c75ab 100644 --- a/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs +++ b/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs @@ -168,8 +168,8 @@ wordAndBitIndex (BlockIx blockIx) (BitIx blockBitIx) = -- and the bit index. -- * There are 8 Word64s in each 64byte block. -- * Use 3 bits (bits 6..8) to select the Word64 within the block - wordIx = fromIntegral blockIx `shiftL` 3 - + (blockBitIx `shiftR` 6) .&. 7 + wordIx = fromIntegral blockIx `shiftL` 3 -- * 8 + + (blockBitIx `shiftR` 6) .&. 7 -- `div` 64, `mod` 8 -- Bits 0..5 of blockBitIx select the bit within Word64 wordBitIx = blockBitIx .&. 63 From 1f445acbdd79ed46060f289403462f2bd49a5af1 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 29 Apr 2025 14:58:44 +0100 Subject: [PATCH 39/43] Apply suggestions from code review Co-authored-by: Joris Dral --- src/Database/LSMTree/Internal/BloomFilter.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Database/LSMTree/Internal/BloomFilter.hs b/src/Database/LSMTree/Internal/BloomFilter.hs index a514f0fe7..99c6b52e6 100644 --- a/src/Database/LSMTree/Internal/BloomFilter.hs +++ b/src/Database/LSMTree/Internal/BloomFilter.hs @@ -122,7 +122,7 @@ hGetByteArrayExactly :: -> Int -> m P.ByteArray hGetByteArrayExactly hfs h len = do - buf <- P.newByteArray 16 + buf <- P.newByteArray (fromIntegral len) _ <- hGetBufExactly hfs h buf 0 (fromIntegral len) P.unsafeFreezeByteArray buf From 7965a1fcb611366083610c584efadc75e449db9e Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 29 Apr 2025 14:59:06 +0100 Subject: [PATCH 40/43] Apply suggestions from code review Co-authored-by: Joris Dral --- bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs b/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs index c3087a783..00cafd08f 100644 --- a/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs +++ b/bench/micro/Bench/Database/LSMTree/Internal/BloomFilter.hs @@ -38,10 +38,10 @@ benchmarks = bgroup "Bench.Database.LSMTree.Internal.BloomFilter" [ ] , env (constructionEnv 2_500_000) $ \ m -> bgroup "construction" [ - bench "easyList 0.1" $ + bench "FPR = 0.1" $ whnf (constructBloom 0.1) m - , bench "easyList 0.9" $ + , bench "FPR = 0.9" $ whnf (constructBloom 0.9) m ] ] From c8c1f91a8924932b2eb16212e230b8af037048e6 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 29 Apr 2025 14:59:42 +0100 Subject: [PATCH 41/43] Apply suggestions from code review Co-authored-by: Joris Dral --- bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs b/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs index b406c75ab..d9c4e58ec 100644 --- a/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs +++ b/bloomfilter/src/Data/BloomFilter/Blocked/BitArray.hs @@ -172,7 +172,7 @@ wordAndBitIndex (BlockIx blockIx) (BitIx blockBitIx) = + (blockBitIx `shiftR` 6) .&. 7 -- `div` 64, `mod` 8 -- Bits 0..5 of blockBitIx select the bit within Word64 - wordBitIx = blockBitIx .&. 63 + wordBitIx = blockBitIx .&. 63 -- `mod` 64 {-# INLINE unsafeTestBit #-} -- like testBit but using unsafeShiftL instead of shiftL From 91e9bbc68973c5cc45ebf0f69cfe0b4317f5eb53 Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 29 Apr 2025 15:10:52 +0100 Subject: [PATCH 42/43] Apply suggestions from code review Co-authored-by: Joris Dral --- bloomfilter/src/Data/BloomFilter/Classic/Calc.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs b/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs index 1f9f0c840..bdac5c0b1 100644 --- a/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs +++ b/bloomfilter/src/Data/BloomFilter/Classic/Calc.hs @@ -36,7 +36,7 @@ type NumEntries = Int -- size of the bloom filter in bits. In general the bits per entry can be -- fractional. The final bloom filter size in will be rounded to a whole -- number of bits. --- 2. The number of hashes 'policyNumHashes'. +-- 2. The number of hashes 'policyHashes'. -- 3. The expected FPR for the policy using 'policyFPR'. -- data BloomPolicy = BloomPolicy { From 9714096bc160bf4cbe2527faffe0ae0edeadc36c Mon Sep 17 00:00:00 2001 From: Duncan Coutts Date: Tue, 29 Apr 2025 15:11:28 +0100 Subject: [PATCH 43/43] Apply suggestions from code review Co-authored-by: Joris Dral --- bloomfilter/tests/bloomfilter-tests.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bloomfilter/tests/bloomfilter-tests.hs b/bloomfilter/tests/bloomfilter-tests.hs index 52227d28e..d8223fb69 100644 --- a/bloomfilter/tests/bloomfilter-tests.hs +++ b/bloomfilter/tests/bloomfilter-tests.hs @@ -128,6 +128,7 @@ prop_calc_policy_bits proxy (BitsPerEntry lb, BitsPerEntry ub) t where (~~~) = withinTolerance t +-- | Compare @sizeHashes . sizeForBits@ against @numHashFunctions@ prop_calc_size_hashes_bits :: BloomFilter bloom => Proxy bloom -> BitsPerEntry -> NumEntries -> Property prop_calc_size_hashes_bits proxy (BitsPerEntry c) (NumEntries numEntries) = @@ -136,6 +137,7 @@ prop_calc_size_hashes_bits proxy (BitsPerEntry c) (NumEntries numEntries) = (fromIntegral numEntries) === fromIntegral (B.sizeHashes bsize) +-- | Compare @sizeForFPR@ against @falsePositiveRate@ with some tolerance for deviations prop_calc_size_fpr_fpr :: BloomFilter bloom => Proxy bloom -> FPR -> NumEntries -> Property prop_calc_size_fpr_fpr proxy (FPR fpr) (NumEntries numEntries) = @@ -147,6 +149,7 @@ prop_calc_size_fpr_fpr proxy (FPR fpr) (NumEntries numEntries) = where (~~~) = withinTolerance 1e-6 +-- | Compare @sizeForBits@ against @falsePositiveRate@ with some tolerance for deviations prop_calc_size_fpr_bits :: BloomFilter bloom => Proxy bloom -> BitsPerEntry -> NumEntries -> Property prop_calc_size_fpr_bits proxy (BitsPerEntry c) (NumEntries numEntries) =